forked from mirrors/linux
Problem: On large page size configurations (16KiB, 64KiB), the CMA
alignment requirement (CMA_MIN_ALIGNMENT_BYTES) increases considerably,
and this causes the CMA reservations to be larger than necessary. This
means that system will have less available MIGRATE_UNMOVABLE and
MIGRATE_RECLAIMABLE page blocks since MIGRATE_CMA can't fallback to them.
The CMA_MIN_ALIGNMENT_BYTES increases because it depends on MAX_PAGE_ORDER
which depends on ARCH_FORCE_MAX_ORDER. The value of ARCH_FORCE_MAX_ORDER
increases on 16k and 64k kernels.
For example, in ARM, the CMA alignment requirement when:
- CONFIG_ARCH_FORCE_MAX_ORDER default value is used
- CONFIG_TRANSPARENT_HUGEPAGE is set:
PAGE_SIZE | MAX_PAGE_ORDER | pageblock_order | CMA_MIN_ALIGNMENT_BYTES
-----------------------------------------------------------------------
4KiB | 10 | 9 | 4KiB * (2 ^ 9) = 2MiB
16Kib | 11 | 11 | 16KiB * (2 ^ 11) = 32MiB
64KiB | 13 | 13 | 64KiB * (2 ^ 13) = 512MiB
There are some extreme cases for the CMA alignment requirement when:
- CONFIG_ARCH_FORCE_MAX_ORDER maximum value is set
- CONFIG_TRANSPARENT_HUGEPAGE is NOT set:
- CONFIG_HUGETLB_PAGE is NOT set
PAGE_SIZE | MAX_PAGE_ORDER | pageblock_order | CMA_MIN_ALIGNMENT_BYTES
------------------------------------------------------------------------
4KiB | 15 | 15 | 4KiB * (2 ^ 15) = 128MiB
16Kib | 13 | 13 | 16KiB * (2 ^ 13) = 128MiB
64KiB | 13 | 13 | 64KiB * (2 ^ 13) = 512MiB
This affects the CMA reservations for the drivers. If a driver in a
4KiB kernel needs 4MiB of CMA memory, in a 16KiB kernel, the minimal
reservation has to be 32MiB due to the alignment requirements:
reserved-memory {
...
cma_test_reserve: cma_test_reserve {
compatible = "shared-dma-pool";
size = <0x0 0x400000>; /* 4 MiB */
...
};
};
reserved-memory {
...
cma_test_reserve: cma_test_reserve {
compatible = "shared-dma-pool";
size = <0x0 0x2000000>; /* 32 MiB */
...
};
};
Solution: Add a new config CONFIG_PAGE_BLOCK_ORDER that allows to set the
page block order in all the architectures. The maximum page block order
will be given by ARCH_FORCE_MAX_ORDER.
By default, CONFIG_PAGE_BLOCK_ORDER will have the same value that
ARCH_FORCE_MAX_ORDER. This will make sure that current kernel
configurations won't be affected by this change. It is a opt-in change.
This patch will allow to have the same CMA alignment requirements for
large page sizes (16KiB, 64KiB) as that in 4kb kernels by setting a lower
pageblock_order.
Tests:
- Verified that HugeTLB pages work when pageblock_order is 1, 7, 10 on
4k and 16k kernels.
- Verified that Transparent Huge Pages work when pageblock_order is 1,
7, 10 on 4k and 16k kernels.
- Verified that dma-buf heaps allocations work when pageblock_order is
1, 7, 10 on 4k and 16k kernels.
Benchmarks:
The benchmarks compare 16kb kernels with pageblock_order 10 and 7. The
reason for the pageblock_order 7 is because this value makes the min CMA
alignment requirement the same as that in 4kb kernels (2MB).
- Perform 100K dma-buf heaps (/dev/dma_heap/system) allocations of
SZ_8M, SZ_4M, SZ_2M, SZ_1M, SZ_64, SZ_8, SZ_4. Use simpleperf
(https://developer.android.com/ndk/guides/simpleperf) to measure the #
of instructions and page-faults on 16k kernels. The benchmark was
executed 10 times. The averages are below:
# instructions | #page-faults
order 10 | order 7 | order 10 | order 7
--------------------------------------------------------
13,891,765,770 | 11,425,777,314 | 220 | 217
14,456,293,487 | 12,660,819,302 | 224 | 219
13,924,261,018 | 13,243,970,736 | 217 | 221
13,910,886,504 | 13,845,519,630 | 217 | 221
14,388,071,190 | 13,498,583,098 | 223 | 224
13,656,442,167 | 12,915,831,681 | 216 | 218
13,300,268,343 | 12,930,484,776 | 222 | 218
13,625,470,223 | 14,234,092,777 | 219 | 218
13,508,964,965 | 13,432,689,094 | 225 | 219
13,368,950,667 | 13,683,587,37 | 219 | 225
-------------------------------------------------------------------
13,803,137,433 | 13,131,974,268 | 220 | 220 Averages
There were 4.85% #instructions when order was 7, in comparison with order
10.
13,803,137,433 - 13,131,974,268 = -671,163,166 (-4.86%)
The number of page faults in order 7 and 10 were the same.
These results didn't show any significant regression when the
pageblock_order is set to 7 on 16kb kernels.
- Run speedometer 3.1 (https://browserbench.org/Speedometer3.1/) 5 times
on the 16k kernels with pageblock_order 7 and 10.
order 10 | order 7 | order 7 - order 10 | (order 7 - order 10) %
-------------------------------------------------------------------
15.8 | 16.4 | 0.6 | 3.80%
16.4 | 16.2 | -0.2 | -1.22%
16.6 | 16.3 | -0.3 | -1.81%
16.8 | 16.3 | -0.5 | -2.98%
16.6 | 16.8 | 0.2 | 1.20%
-------------------------------------------------------------------
16.44 16.4 -0.04 -0.24% Averages
The results didn't show any significant regression when the
pageblock_order is set to 7 on 16kb kernels.
Link: https://lkml.kernel.org/r/20250521215807.1860663-1-jyescas@google.com
Signed-off-by: Juan Yescas <jyescas@google.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
102 lines
2.8 KiB
C
102 lines
2.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Macros for manipulating and testing flags related to a
|
|
* pageblock_nr_pages number of pages.
|
|
*
|
|
* Copyright (C) IBM Corporation, 2006
|
|
*
|
|
* Original author, Mel Gorman
|
|
* Major cleanups and reduction of bit operations, Andy Whitcroft
|
|
*/
|
|
#ifndef PAGEBLOCK_FLAGS_H
|
|
#define PAGEBLOCK_FLAGS_H
|
|
|
|
#include <linux/types.h>
|
|
|
|
#define PB_migratetype_bits 3
|
|
/* Bit indices that affect a whole block of pages */
|
|
enum pageblock_bits {
|
|
PB_migrate,
|
|
PB_migrate_end = PB_migrate + PB_migratetype_bits - 1,
|
|
/* 3 bits required for migrate types */
|
|
PB_migrate_skip,/* If set the block is skipped by compaction */
|
|
|
|
/*
|
|
* Assume the bits will always align on a word. If this assumption
|
|
* changes then get/set pageblock needs updating.
|
|
*/
|
|
NR_PAGEBLOCK_BITS
|
|
};
|
|
|
|
#if defined(CONFIG_HUGETLB_PAGE)
|
|
|
|
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
|
|
|
|
/* Huge page sizes are variable */
|
|
extern unsigned int pageblock_order;
|
|
|
|
#else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
|
|
|
|
/*
|
|
* Huge pages are a constant size, but don't exceed the maximum allocation
|
|
* granularity.
|
|
*/
|
|
#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, PAGE_BLOCK_ORDER)
|
|
|
|
#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
|
|
|
|
#elif defined(CONFIG_TRANSPARENT_HUGEPAGE)
|
|
|
|
#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, PAGE_BLOCK_ORDER)
|
|
|
|
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
|
|
|
|
/* If huge pages are not used, group by PAGE_BLOCK_ORDER */
|
|
#define pageblock_order PAGE_BLOCK_ORDER
|
|
|
|
#endif /* CONFIG_HUGETLB_PAGE */
|
|
|
|
#define pageblock_nr_pages (1UL << pageblock_order)
|
|
#define pageblock_align(pfn) ALIGN((pfn), pageblock_nr_pages)
|
|
#define pageblock_aligned(pfn) IS_ALIGNED((pfn), pageblock_nr_pages)
|
|
#define pageblock_start_pfn(pfn) ALIGN_DOWN((pfn), pageblock_nr_pages)
|
|
#define pageblock_end_pfn(pfn) ALIGN((pfn) + 1, pageblock_nr_pages)
|
|
|
|
/* Forward declaration */
|
|
struct page;
|
|
|
|
unsigned long get_pfnblock_flags_mask(const struct page *page,
|
|
unsigned long pfn,
|
|
unsigned long mask);
|
|
|
|
void set_pfnblock_flags_mask(struct page *page,
|
|
unsigned long flags,
|
|
unsigned long pfn,
|
|
unsigned long mask);
|
|
|
|
/* Declarations for getting and setting flags. See mm/page_alloc.c */
|
|
#ifdef CONFIG_COMPACTION
|
|
#define get_pageblock_skip(page) \
|
|
get_pfnblock_flags_mask(page, page_to_pfn(page), \
|
|
(1 << (PB_migrate_skip)))
|
|
#define clear_pageblock_skip(page) \
|
|
set_pfnblock_flags_mask(page, 0, page_to_pfn(page), \
|
|
(1 << PB_migrate_skip))
|
|
#define set_pageblock_skip(page) \
|
|
set_pfnblock_flags_mask(page, (1 << PB_migrate_skip), \
|
|
page_to_pfn(page), \
|
|
(1 << PB_migrate_skip))
|
|
#else
|
|
static inline bool get_pageblock_skip(struct page *page)
|
|
{
|
|
return false;
|
|
}
|
|
static inline void clear_pageblock_skip(struct page *page)
|
|
{
|
|
}
|
|
static inline void set_pageblock_skip(struct page *page)
|
|
{
|
|
}
|
|
#endif /* CONFIG_COMPACTION */
|
|
|
|
#endif /* PAGEBLOCK_FLAGS_H */
|