forked from mirrors/linux
		
	hugetlb_cgroup: add accounting for shared mappings
For shared mappings, the pointer to the hugetlb_cgroup to uncharge lives in the resv_map entries, in file_region->reservation_counter. After a call to region_chg, we charge the approprate hugetlb_cgroup, and if successful, we pass on the hugetlb_cgroup info to a follow up region_add call. When a file_region entry is added to the resv_map via region_add, we put the pointer to that cgroup in file_region->reservation_counter. If charging doesn't succeed, we report the error to the caller, so that the kernel fails the reservation. On region_del, which is when the hugetlb memory is unreserved, we also uncharge the file_region->reservation_counter. [akpm@linux-foundation.org: forward declare struct file_region] Signed-off-by: Mina Almasry <almasrymina@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com> Cc: David Rientjes <rientjes@google.com> Cc: Greg Thelen <gthelen@google.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Sandipan Das <sandipan@linux.ibm.com> Cc: Shakeel Butt <shakeelb@google.com> Cc: Shuah Khan <shuah@kernel.org> Link: http://lkml.kernel.org/r/20200211213128.73302-5-almasrymina@google.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									0db9d74ed8
								
							
						
					
					
						commit
						075a61d07a
					
				
					 4 changed files with 155 additions and 54 deletions
				
			
		|  | @ -57,6 +57,41 @@ struct resv_map { | |||
| 	struct cgroup_subsys_state *css; | ||||
| #endif | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Region tracking -- allows tracking of reservations and instantiated pages | ||||
|  *                    across the pages in a mapping. | ||||
|  * | ||||
|  * The region data structures are embedded into a resv_map and protected | ||||
|  * by a resv_map's lock.  The set of regions within the resv_map represent | ||||
|  * reservations for huge pages, or huge pages that have already been | ||||
|  * instantiated within the map.  The from and to elements are huge page | ||||
|  * indicies into the associated mapping.  from indicates the starting index | ||||
|  * of the region.  to represents the first index past the end of  the region. | ||||
|  * | ||||
|  * For example, a file region structure with from == 0 and to == 4 represents | ||||
|  * four huge pages in a mapping.  It is important to note that the to element | ||||
|  * represents the first element past the end of the region. This is used in | ||||
|  * arithmetic as 4(to) - 0(from) = 4 huge pages in the region. | ||||
|  * | ||||
|  * Interval notation of the form [from, to) will be used to indicate that | ||||
|  * the endpoint from is inclusive and to is exclusive. | ||||
|  */ | ||||
| struct file_region { | ||||
| 	struct list_head link; | ||||
| 	long from; | ||||
| 	long to; | ||||
| #ifdef CONFIG_CGROUP_HUGETLB | ||||
| 	/*
 | ||||
| 	 * On shared mappings, each reserved region appears as a struct | ||||
| 	 * file_region in resv_map. These fields hold the info needed to | ||||
| 	 * uncharge each reservation. | ||||
| 	 */ | ||||
| 	struct page_counter *reservation_counter; | ||||
| 	struct cgroup_subsys_state *css; | ||||
| #endif | ||||
| }; | ||||
| 
 | ||||
| extern struct resv_map *resv_map_alloc(void); | ||||
| void resv_map_release(struct kref *ref); | ||||
| 
 | ||||
|  |  | |||
|  | @ -19,6 +19,7 @@ | |||
| 
 | ||||
| struct hugetlb_cgroup; | ||||
| struct resv_map; | ||||
| struct file_region; | ||||
| 
 | ||||
| /*
 | ||||
|  * Minimum page order trackable by hugetlb cgroup. | ||||
|  | @ -135,11 +136,21 @@ extern void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, | |||
| 					    unsigned long start, | ||||
| 					    unsigned long end); | ||||
| 
 | ||||
| extern void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, | ||||
| 						struct file_region *rg, | ||||
| 						unsigned long nr_pages); | ||||
| 
 | ||||
| extern void hugetlb_cgroup_file_init(void) __init; | ||||
| extern void hugetlb_cgroup_migrate(struct page *oldhpage, | ||||
| 				   struct page *newhpage); | ||||
| 
 | ||||
| #else | ||||
| static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, | ||||
| 						       struct file_region *rg, | ||||
| 						       unsigned long nr_pages) | ||||
| { | ||||
| } | ||||
| 
 | ||||
| static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) | ||||
| { | ||||
| 	return NULL; | ||||
|  |  | |||
							
								
								
									
										148
									
								
								mm/hugetlb.c
									
									
									
									
									
								
							
							
						
						
									
										148
									
								
								mm/hugetlb.c
									
									
									
									
									
								
							|  | @ -220,31 +220,6 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma) | |||
| 	return subpool_inode(file_inode(vma->vm_file)); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Region tracking -- allows tracking of reservations and instantiated pages | ||||
|  *                    across the pages in a mapping. | ||||
|  * | ||||
|  * The region data structures are embedded into a resv_map and protected | ||||
|  * by a resv_map's lock.  The set of regions within the resv_map represent | ||||
|  * reservations for huge pages, or huge pages that have already been | ||||
|  * instantiated within the map.  The from and to elements are huge page | ||||
|  * indicies into the associated mapping.  from indicates the starting index | ||||
|  * of the region.  to represents the first index past the end of  the region. | ||||
|  * | ||||
|  * For example, a file region structure with from == 0 and to == 4 represents | ||||
|  * four huge pages in a mapping.  It is important to note that the to element | ||||
|  * represents the first element past the end of the region. This is used in | ||||
|  * arithmetic as 4(to) - 0(from) = 4 huge pages in the region. | ||||
|  * | ||||
|  * Interval notation of the form [from, to) will be used to indicate that | ||||
|  * the endpoint from is inclusive and to is exclusive. | ||||
|  */ | ||||
| struct file_region { | ||||
| 	struct list_head link; | ||||
| 	long from; | ||||
| 	long to; | ||||
| }; | ||||
| 
 | ||||
| /* Helper that removes a struct file_region from the resv_map cache and returns
 | ||||
|  * it for use. | ||||
|  */ | ||||
|  | @ -266,6 +241,41 @@ get_file_region_entry_from_cache(struct resv_map *resv, long from, long to) | |||
| 	return nrg; | ||||
| } | ||||
| 
 | ||||
| static void copy_hugetlb_cgroup_uncharge_info(struct file_region *nrg, | ||||
| 					      struct file_region *rg) | ||||
| { | ||||
| #ifdef CONFIG_CGROUP_HUGETLB | ||||
| 	nrg->reservation_counter = rg->reservation_counter; | ||||
| 	nrg->css = rg->css; | ||||
| 	if (rg->css) | ||||
| 		css_get(rg->css); | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| /* Helper that records hugetlb_cgroup uncharge info. */ | ||||
| static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg, | ||||
| 						struct hstate *h, | ||||
| 						struct resv_map *resv, | ||||
| 						struct file_region *nrg) | ||||
| { | ||||
| #ifdef CONFIG_CGROUP_HUGETLB | ||||
| 	if (h_cg) { | ||||
| 		nrg->reservation_counter = | ||||
| 			&h_cg->rsvd_hugepage[hstate_index(h)]; | ||||
| 		nrg->css = &h_cg->css; | ||||
| 		if (!resv->pages_per_hpage) | ||||
| 			resv->pages_per_hpage = pages_per_huge_page(h); | ||||
| 		/* pages_per_hpage should be the same for all entries in
 | ||||
| 		 * a resv_map. | ||||
| 		 */ | ||||
| 		VM_BUG_ON(resv->pages_per_hpage != pages_per_huge_page(h)); | ||||
| 	} else { | ||||
| 		nrg->reservation_counter = NULL; | ||||
| 		nrg->css = NULL; | ||||
| 	} | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| /* Must be called with resv->lock held. Calling this with count_only == true
 | ||||
|  * will count the number of pages to be added but will not modify the linked | ||||
|  * list. If regions_needed != NULL and count_only == true, then regions_needed | ||||
|  | @ -273,7 +283,9 @@ get_file_region_entry_from_cache(struct resv_map *resv, long from, long to) | |||
|  * add the regions for this range. | ||||
|  */ | ||||
| static long add_reservation_in_range(struct resv_map *resv, long f, long t, | ||||
| 				     long *regions_needed, bool count_only) | ||||
| 				     struct hugetlb_cgroup *h_cg, | ||||
| 				     struct hstate *h, long *regions_needed, | ||||
| 				     bool count_only) | ||||
| { | ||||
| 	long add = 0; | ||||
| 	struct list_head *head = &resv->regions; | ||||
|  | @ -312,6 +324,8 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t, | |||
| 			if (!count_only) { | ||||
| 				nrg = get_file_region_entry_from_cache( | ||||
| 					resv, last_accounted_offset, rg->from); | ||||
| 				record_hugetlb_cgroup_uncharge_info(h_cg, h, | ||||
| 								    resv, nrg); | ||||
| 				list_add(&nrg->link, rg->link.prev); | ||||
| 			} else if (regions_needed) | ||||
| 				*regions_needed += 1; | ||||
|  | @ -328,6 +342,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t, | |||
| 		if (!count_only) { | ||||
| 			nrg = get_file_region_entry_from_cache( | ||||
| 				resv, last_accounted_offset, t); | ||||
| 			record_hugetlb_cgroup_uncharge_info(h_cg, h, resv, nrg); | ||||
| 			list_add(&nrg->link, rg->link.prev); | ||||
| 		} else if (regions_needed) | ||||
| 			*regions_needed += 1; | ||||
|  | @ -416,7 +431,8 @@ static int allocate_file_region_entries(struct resv_map *resv, | |||
|  * 1 page will only require at most 1 entry. | ||||
|  */ | ||||
| static long region_add(struct resv_map *resv, long f, long t, | ||||
| 		       long in_regions_needed) | ||||
| 		       long in_regions_needed, struct hstate *h, | ||||
| 		       struct hugetlb_cgroup *h_cg) | ||||
| { | ||||
| 	long add = 0, actual_regions_needed = 0; | ||||
| 
 | ||||
|  | @ -424,7 +440,8 @@ static long region_add(struct resv_map *resv, long f, long t, | |||
| retry: | ||||
| 
 | ||||
| 	/* Count how many regions are actually needed to execute this add. */ | ||||
| 	add_reservation_in_range(resv, f, t, &actual_regions_needed, true); | ||||
| 	add_reservation_in_range(resv, f, t, NULL, NULL, &actual_regions_needed, | ||||
| 				 true); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Check for sufficient descriptors in the cache to accommodate | ||||
|  | @ -452,7 +469,7 @@ static long region_add(struct resv_map *resv, long f, long t, | |||
| 		goto retry; | ||||
| 	} | ||||
| 
 | ||||
| 	add = add_reservation_in_range(resv, f, t, NULL, false); | ||||
| 	add = add_reservation_in_range(resv, f, t, h_cg, h, NULL, false); | ||||
| 
 | ||||
| 	resv->adds_in_progress -= in_regions_needed; | ||||
| 
 | ||||
|  | @ -489,7 +506,8 @@ static long region_chg(struct resv_map *resv, long f, long t, | |||
| 	spin_lock(&resv->lock); | ||||
| 
 | ||||
| 	/* Count how many hugepages in this range are NOT respresented. */ | ||||
| 	chg = add_reservation_in_range(resv, f, t, out_regions_needed, true); | ||||
| 	chg = add_reservation_in_range(resv, f, t, NULL, NULL, | ||||
| 				       out_regions_needed, true); | ||||
| 
 | ||||
| 	if (*out_regions_needed == 0) | ||||
| 		*out_regions_needed = 1; | ||||
|  | @ -589,11 +607,17 @@ static long region_del(struct resv_map *resv, long f, long t) | |||
| 			/* New entry for end of split region */ | ||||
| 			nrg->from = t; | ||||
| 			nrg->to = rg->to; | ||||
| 
 | ||||
| 			copy_hugetlb_cgroup_uncharge_info(nrg, rg); | ||||
| 
 | ||||
| 			INIT_LIST_HEAD(&nrg->link); | ||||
| 
 | ||||
| 			/* Original entry is trimmed */ | ||||
| 			rg->to = f; | ||||
| 
 | ||||
| 			hugetlb_cgroup_uncharge_file_region( | ||||
| 				resv, rg, nrg->to - nrg->from); | ||||
| 
 | ||||
| 			list_add(&nrg->link, &rg->link); | ||||
| 			nrg = NULL; | ||||
| 			break; | ||||
|  | @ -601,6 +625,8 @@ static long region_del(struct resv_map *resv, long f, long t) | |||
| 
 | ||||
| 		if (f <= rg->from && t >= rg->to) { /* Remove entire region */ | ||||
| 			del += rg->to - rg->from; | ||||
| 			hugetlb_cgroup_uncharge_file_region(resv, rg, | ||||
| 							    rg->to - rg->from); | ||||
| 			list_del(&rg->link); | ||||
| 			kfree(rg); | ||||
| 			continue; | ||||
|  | @ -609,9 +635,15 @@ static long region_del(struct resv_map *resv, long f, long t) | |||
| 		if (f <= rg->from) {	/* Trim beginning of region */ | ||||
| 			del += t - rg->from; | ||||
| 			rg->from = t; | ||||
| 
 | ||||
| 			hugetlb_cgroup_uncharge_file_region(resv, rg, | ||||
| 							    t - rg->from); | ||||
| 		} else {		/* Trim end of region */ | ||||
| 			del += rg->to - f; | ||||
| 			rg->to = f; | ||||
| 
 | ||||
| 			hugetlb_cgroup_uncharge_file_region(resv, rg, | ||||
| 							    rg->to - f); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
|  | @ -2124,7 +2156,7 @@ static long __vma_reservation_common(struct hstate *h, | |||
| 		VM_BUG_ON(dummy_out_regions_needed != 1); | ||||
| 		break; | ||||
| 	case VMA_COMMIT_RESV: | ||||
| 		ret = region_add(resv, idx, idx + 1, 1); | ||||
| 		ret = region_add(resv, idx, idx + 1, 1, NULL, NULL); | ||||
| 		/* region_add calls of range 1 should never fail. */ | ||||
| 		VM_BUG_ON(ret < 0); | ||||
| 		break; | ||||
|  | @ -2134,7 +2166,7 @@ static long __vma_reservation_common(struct hstate *h, | |||
| 		break; | ||||
| 	case VMA_ADD_RESV: | ||||
| 		if (vma->vm_flags & VM_MAYSHARE) { | ||||
| 			ret = region_add(resv, idx, idx + 1, 1); | ||||
| 			ret = region_add(resv, idx, idx + 1, 1, NULL, NULL); | ||||
| 			/* region_add calls of range 1 should never fail. */ | ||||
| 			VM_BUG_ON(ret < 0); | ||||
| 		} else { | ||||
|  | @ -4830,7 +4862,7 @@ int hugetlb_reserve_pages(struct inode *inode, | |||
| 	struct hstate *h = hstate_inode(inode); | ||||
| 	struct hugepage_subpool *spool = subpool_inode(inode); | ||||
| 	struct resv_map *resv_map; | ||||
| 	struct hugetlb_cgroup *h_cg; | ||||
| 	struct hugetlb_cgroup *h_cg = NULL; | ||||
| 	long gbl_reserve, regions_needed = 0; | ||||
| 
 | ||||
| 	/* This should never happen */ | ||||
|  | @ -4871,19 +4903,6 @@ int hugetlb_reserve_pages(struct inode *inode, | |||
| 
 | ||||
| 		chg = to - from; | ||||
| 
 | ||||
| 		if (hugetlb_cgroup_charge_cgroup_rsvd( | ||||
| 			    hstate_index(h), chg * pages_per_huge_page(h), | ||||
| 			    &h_cg)) { | ||||
| 			kref_put(&resv_map->refs, resv_map_release); | ||||
| 			return -ENOMEM; | ||||
| 		} | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Since this branch handles private mappings, we attach the | ||||
| 		 * counter to uncharge for this reservation off resv_map. | ||||
| 		 */ | ||||
| 		resv_map_set_hugetlb_cgroup_uncharge_info(resv_map, h_cg, h); | ||||
| 
 | ||||
| 		set_vma_resv_map(vma, resv_map); | ||||
| 		set_vma_resv_flags(vma, HPAGE_RESV_OWNER); | ||||
| 	} | ||||
|  | @ -4893,6 +4912,21 @@ int hugetlb_reserve_pages(struct inode *inode, | |||
| 		goto out_err; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = hugetlb_cgroup_charge_cgroup_rsvd( | ||||
| 		hstate_index(h), chg * pages_per_huge_page(h), &h_cg); | ||||
| 
 | ||||
| 	if (ret < 0) { | ||||
| 		ret = -ENOMEM; | ||||
| 		goto out_err; | ||||
| 	} | ||||
| 
 | ||||
| 	if (vma && !(vma->vm_flags & VM_MAYSHARE) && h_cg) { | ||||
| 		/* For private mappings, the hugetlb_cgroup uncharge info hangs
 | ||||
| 		 * of the resv_map. | ||||
| 		 */ | ||||
| 		resv_map_set_hugetlb_cgroup_uncharge_info(resv_map, h_cg, h); | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * There must be enough pages in the subpool for the mapping. If | ||||
| 	 * the subpool has a minimum size, there may be some global | ||||
|  | @ -4901,7 +4935,7 @@ int hugetlb_reserve_pages(struct inode *inode, | |||
| 	gbl_reserve = hugepage_subpool_get_pages(spool, chg); | ||||
| 	if (gbl_reserve < 0) { | ||||
| 		ret = -ENOSPC; | ||||
| 		goto out_err; | ||||
| 		goto out_uncharge_cgroup; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
|  | @ -4910,9 +4944,7 @@ int hugetlb_reserve_pages(struct inode *inode, | |||
| 	 */ | ||||
| 	ret = hugetlb_acct_memory(h, gbl_reserve); | ||||
| 	if (ret < 0) { | ||||
| 		/* put back original number of pages, chg */ | ||||
| 		(void)hugepage_subpool_put_pages(spool, chg); | ||||
| 		goto out_err; | ||||
| 		goto out_put_pages; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
|  | @ -4927,13 +4959,11 @@ int hugetlb_reserve_pages(struct inode *inode, | |||
| 	 * else has to be done for private mappings here | ||||
| 	 */ | ||||
| 	if (!vma || vma->vm_flags & VM_MAYSHARE) { | ||||
| 		add = region_add(resv_map, from, to, regions_needed); | ||||
| 		add = region_add(resv_map, from, to, regions_needed, h, h_cg); | ||||
| 
 | ||||
| 		if (unlikely(add < 0)) { | ||||
| 			hugetlb_acct_memory(h, -gbl_reserve); | ||||
| 			/* put back original number of pages, chg */ | ||||
| 			(void)hugepage_subpool_put_pages(spool, chg); | ||||
| 			goto out_err; | ||||
| 			goto out_put_pages; | ||||
| 		} else if (unlikely(chg > add)) { | ||||
| 			/*
 | ||||
| 			 * pages in this range were added to the reserve | ||||
|  | @ -4944,12 +4974,22 @@ int hugetlb_reserve_pages(struct inode *inode, | |||
| 			 */ | ||||
| 			long rsv_adjust; | ||||
| 
 | ||||
| 			hugetlb_cgroup_uncharge_cgroup_rsvd( | ||||
| 				hstate_index(h), | ||||
| 				(chg - add) * pages_per_huge_page(h), h_cg); | ||||
| 
 | ||||
| 			rsv_adjust = hugepage_subpool_put_pages(spool, | ||||
| 								chg - add); | ||||
| 			hugetlb_acct_memory(h, -rsv_adjust); | ||||
| 		} | ||||
| 	} | ||||
| 	return 0; | ||||
| out_put_pages: | ||||
| 	/* put back original number of pages, chg */ | ||||
| 	(void)hugepage_subpool_put_pages(spool, chg); | ||||
| out_uncharge_cgroup: | ||||
| 	hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h), | ||||
| 					    chg * pages_per_huge_page(h), h_cg); | ||||
| out_err: | ||||
| 	if (!vma || vma->vm_flags & VM_MAYSHARE) | ||||
| 		/* Only call region_abort if the region_chg succeeded but the
 | ||||
|  |  | |||
|  | @ -391,6 +391,21 @@ void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, | |||
| 	css_put(resv->css); | ||||
| } | ||||
| 
 | ||||
| void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, | ||||
| 					 struct file_region *rg, | ||||
| 					 unsigned long nr_pages) | ||||
| { | ||||
| 	if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (rg->reservation_counter && resv->pages_per_hpage && nr_pages > 0 && | ||||
| 	    !resv->reservation_counter) { | ||||
| 		page_counter_uncharge(rg->reservation_counter, | ||||
| 				      nr_pages * resv->pages_per_hpage); | ||||
| 		css_put(rg->css); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| enum { | ||||
| 	RES_USAGE, | ||||
| 	RES_RSVD_USAGE, | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Mina Almasry
						Mina Almasry