mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 08:38:45 +02:00 
			
		
		
		
	Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: "146 patches. Subsystems affected by this patch series: kthread, ia64, scripts, ntfs, squashfs, ocfs2, vfs, and mm (slab-generic, slab, kmemleak, dax, kasan, debug, pagecache, gup, shmem, frontswap, memremap, memcg, selftests, pagemap, dma, vmalloc, memory-failure, hugetlb, userfaultfd, vmscan, mempolicy, oom-kill, hugetlbfs, migration, thp, ksm, page-poison, percpu, rmap, zswap, zram, cleanups, hmm, and damon)" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (146 commits) mm/damon: hide kernel pointer from tracepoint event mm/damon/vaddr: hide kernel pointer from damon_va_three_regions() failure log mm/damon/vaddr: use pr_debug() for damon_va_three_regions() failure logging mm/damon/dbgfs: remove an unnecessary variable mm/damon: move the implementation of damon_insert_region to damon.h mm/damon: add access checking for hugetlb pages Docs/admin-guide/mm/damon/usage: update for schemes statistics mm/damon/dbgfs: support all DAMOS stats Docs/admin-guide/mm/damon/reclaim: document statistics parameters mm/damon/reclaim: provide reclamation statistics mm/damon/schemes: account how many times quota limit has exceeded mm/damon/schemes: account scheme actions that successfully applied mm/damon: remove a mistakenly added comment for a future feature Docs/admin-guide/mm/damon/usage: update for kdamond_pid and (mk|rm)_contexts Docs/admin-guide/mm/damon/usage: mention tracepoint at the beginning Docs/admin-guide/mm/damon/usage: remove redundant information Docs/admin-guide/mm/damon/usage: update for scheme quotas and watermarks mm/damon: convert macro functions to static inline functions mm/damon: modify damon_rand() macro to static inline function mm/damon: move damon_rand() definition into damon.h ...
This commit is contained in:
		
						commit
						f56caedaf9
					
				
					 211 changed files with 3825 additions and 1604 deletions
				
			
		|  | @ -29,12 +29,14 @@ Brief summary of control files:: | ||||||
|  hugetlb.<hugepagesize>.max_usage_in_bytes             # show max "hugepagesize" hugetlb  usage recorded |  hugetlb.<hugepagesize>.max_usage_in_bytes             # show max "hugepagesize" hugetlb  usage recorded | ||||||
|  hugetlb.<hugepagesize>.usage_in_bytes                 # show current usage for "hugepagesize" hugetlb |  hugetlb.<hugepagesize>.usage_in_bytes                 # show current usage for "hugepagesize" hugetlb | ||||||
|  hugetlb.<hugepagesize>.failcnt                        # show the number of allocation failure due to HugeTLB usage limit |  hugetlb.<hugepagesize>.failcnt                        # show the number of allocation failure due to HugeTLB usage limit | ||||||
|  |  hugetlb.<hugepagesize>.numa_stat                      # show the numa information of the hugetlb memory charged to this cgroup | ||||||
| 
 | 
 | ||||||
| For a system supporting three hugepage sizes (64k, 32M and 1G), the control | For a system supporting three hugepage sizes (64k, 32M and 1G), the control | ||||||
| files include:: | files include:: | ||||||
| 
 | 
 | ||||||
|   hugetlb.1GB.limit_in_bytes |   hugetlb.1GB.limit_in_bytes | ||||||
|   hugetlb.1GB.max_usage_in_bytes |   hugetlb.1GB.max_usage_in_bytes | ||||||
|  |   hugetlb.1GB.numa_stat | ||||||
|   hugetlb.1GB.usage_in_bytes |   hugetlb.1GB.usage_in_bytes | ||||||
|   hugetlb.1GB.failcnt |   hugetlb.1GB.failcnt | ||||||
|   hugetlb.1GB.rsvd.limit_in_bytes |   hugetlb.1GB.rsvd.limit_in_bytes | ||||||
|  | @ -43,6 +45,7 @@ files include:: | ||||||
|   hugetlb.1GB.rsvd.failcnt |   hugetlb.1GB.rsvd.failcnt | ||||||
|   hugetlb.64KB.limit_in_bytes |   hugetlb.64KB.limit_in_bytes | ||||||
|   hugetlb.64KB.max_usage_in_bytes |   hugetlb.64KB.max_usage_in_bytes | ||||||
|  |   hugetlb.64KB.numa_stat | ||||||
|   hugetlb.64KB.usage_in_bytes |   hugetlb.64KB.usage_in_bytes | ||||||
|   hugetlb.64KB.failcnt |   hugetlb.64KB.failcnt | ||||||
|   hugetlb.64KB.rsvd.limit_in_bytes |   hugetlb.64KB.rsvd.limit_in_bytes | ||||||
|  | @ -51,6 +54,7 @@ files include:: | ||||||
|   hugetlb.64KB.rsvd.failcnt |   hugetlb.64KB.rsvd.failcnt | ||||||
|   hugetlb.32MB.limit_in_bytes |   hugetlb.32MB.limit_in_bytes | ||||||
|   hugetlb.32MB.max_usage_in_bytes |   hugetlb.32MB.max_usage_in_bytes | ||||||
|  |   hugetlb.32MB.numa_stat | ||||||
|   hugetlb.32MB.usage_in_bytes |   hugetlb.32MB.usage_in_bytes | ||||||
|   hugetlb.32MB.failcnt |   hugetlb.32MB.failcnt | ||||||
|   hugetlb.32MB.rsvd.limit_in_bytes |   hugetlb.32MB.rsvd.limit_in_bytes | ||||||
|  |  | ||||||
|  | @ -1268,6 +1268,9 @@ PAGE_SIZE multiple when read back. | ||||||
| 		The number of processes belonging to this cgroup | 		The number of processes belonging to this cgroup | ||||||
| 		killed by any kind of OOM killer. | 		killed by any kind of OOM killer. | ||||||
| 
 | 
 | ||||||
|  |           oom_group_kill | ||||||
|  |                 The number of times a group OOM has occurred. | ||||||
|  | 
 | ||||||
|   memory.events.local |   memory.events.local | ||||||
| 	Similar to memory.events but the fields in the file are local | 	Similar to memory.events but the fields in the file are local | ||||||
| 	to the cgroup i.e. not hierarchical. The file modified event | 	to the cgroup i.e. not hierarchical. The file modified event | ||||||
|  | @ -1311,6 +1314,9 @@ PAGE_SIZE multiple when read back. | ||||||
| 	  sock (npn) | 	  sock (npn) | ||||||
| 		Amount of memory used in network transmission buffers | 		Amount of memory used in network transmission buffers | ||||||
| 
 | 
 | ||||||
|  | 	  vmalloc (npn) | ||||||
|  | 		Amount of memory used for vmap backed memory. | ||||||
|  | 
 | ||||||
| 	  shmem | 	  shmem | ||||||
| 		Amount of cached filesystem data that is swap-backed, | 		Amount of cached filesystem data that is swap-backed, | ||||||
| 		such as tmpfs, shm segments, shared anonymous mmap()s | 		such as tmpfs, shm segments, shared anonymous mmap()s | ||||||
|  | @ -2260,6 +2266,11 @@ HugeTLB Interface Files | ||||||
| 	are local to the cgroup i.e. not hierarchical. The file modified event | 	are local to the cgroup i.e. not hierarchical. The file modified event | ||||||
| 	generated on this file reflects only the local events. | 	generated on this file reflects only the local events. | ||||||
| 
 | 
 | ||||||
|  |   hugetlb.<hugepagesize>.numa_stat | ||||||
|  | 	Similar to memory.numa_stat, it shows the numa information of the | ||||||
|  |         hugetlb pages of <hugepagesize> in this cgroup.  Only active in | ||||||
|  |         use hugetlb pages are included.  The per-node values are in bytes. | ||||||
|  | 
 | ||||||
| Misc | Misc | ||||||
| ---- | ---- | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -208,6 +208,31 @@ PID of the DAMON thread. | ||||||
| If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread.  Else, | If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread.  Else, | ||||||
| -1. | -1. | ||||||
| 
 | 
 | ||||||
|  | nr_reclaim_tried_regions | ||||||
|  | ------------------------ | ||||||
|  | 
 | ||||||
|  | Number of memory regions that tried to be reclaimed by DAMON_RECLAIM. | ||||||
|  | 
 | ||||||
|  | bytes_reclaim_tried_regions | ||||||
|  | --------------------------- | ||||||
|  | 
 | ||||||
|  | Total bytes of memory regions that tried to be reclaimed by DAMON_RECLAIM. | ||||||
|  | 
 | ||||||
|  | nr_reclaimed_regions | ||||||
|  | -------------------- | ||||||
|  | 
 | ||||||
|  | Number of memory regions that successfully be reclaimed by DAMON_RECLAIM. | ||||||
|  | 
 | ||||||
|  | bytes_reclaimed_regions | ||||||
|  | ----------------------- | ||||||
|  | 
 | ||||||
|  | Total bytes of memory regions that successfully be reclaimed by DAMON_RECLAIM. | ||||||
|  | 
 | ||||||
|  | nr_quota_exceeds | ||||||
|  | ---------------- | ||||||
|  | 
 | ||||||
|  | Number of times that the time/space quota limits have exceeded. | ||||||
|  | 
 | ||||||
| Example | Example | ||||||
| ======= | ======= | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -7,37 +7,40 @@ Detailed Usages | ||||||
| DAMON provides below three interfaces for different users. | DAMON provides below three interfaces for different users. | ||||||
| 
 | 
 | ||||||
| - *DAMON user space tool.* | - *DAMON user space tool.* | ||||||
|   This is for privileged people such as system administrators who want a |   `This <https://github.com/awslabs/damo>`_ is for privileged people such as | ||||||
|   just-working human-friendly interface.  Using this, users can use the DAMON’s |   system administrators who want a just-working human-friendly interface. | ||||||
|   major features in a human-friendly way.  It may not be highly tuned for |   Using this, users can use the DAMON’s major features in a human-friendly way. | ||||||
|   special cases, though.  It supports both virtual and physical address spaces |   It may not be highly tuned for special cases, though.  It supports both | ||||||
|   monitoring. |   virtual and physical address spaces monitoring.  For more detail, please | ||||||
|  |   refer to its `usage document | ||||||
|  |   <https://github.com/awslabs/damo/blob/next/USAGE.md>`_. | ||||||
| - *debugfs interface.* | - *debugfs interface.* | ||||||
|   This is for privileged user space programmers who want more optimized use of |   :ref:`This <debugfs_interface>` is for privileged user space programmers who | ||||||
|   DAMON.  Using this, users can use DAMON’s major features by reading |   want more optimized use of DAMON.  Using this, users can use DAMON’s major | ||||||
|   from and writing to special debugfs files.  Therefore, you can write and use |   features by reading from and writing to special debugfs files.  Therefore, | ||||||
|   your personalized DAMON debugfs wrapper programs that reads/writes the |   you can write and use your personalized DAMON debugfs wrapper programs that | ||||||
|   debugfs files instead of you.  The DAMON user space tool is also a reference |   reads/writes the debugfs files instead of you.  The `DAMON user space tool | ||||||
|   implementation of such programs.  It supports both virtual and physical |   <https://github.com/awslabs/damo>`_ is one example of such programs.  It | ||||||
|   address spaces monitoring. |   supports both virtual and physical address spaces monitoring.  Note that this | ||||||
|  |   interface provides only simple :ref:`statistics <damos_stats>` for the | ||||||
|  |   monitoring results.  For detailed monitoring results, DAMON provides a | ||||||
|  |   :ref:`tracepoint <tracepoint>`. | ||||||
| - *Kernel Space Programming Interface.* | - *Kernel Space Programming Interface.* | ||||||
|   This is for kernel space programmers.  Using this, users can utilize every |   :doc:`This </vm/damon/api>` is for kernel space programmers.  Using this, | ||||||
|   feature of DAMON most flexibly and efficiently by writing kernel space |   users can utilize every feature of DAMON most flexibly and efficiently by | ||||||
|   DAMON application programs for you.  You can even extend DAMON for various |   writing kernel space DAMON application programs for you.  You can even extend | ||||||
|   address spaces. |   DAMON for various address spaces.  For detail, please refer to the interface | ||||||
|  |   :doc:`document </vm/damon/api>`. | ||||||
| 
 | 
 | ||||||
| Nevertheless, you could write your own user space tool using the debugfs | 
 | ||||||
| interface.  A reference implementation is available at | .. _debugfs_interface: | ||||||
| https://github.com/awslabs/damo.  If you are a kernel programmer, you could |  | ||||||
| refer to :doc:`/vm/damon/api` for the kernel space programming interface.  For |  | ||||||
| the reason, this document describes only the debugfs interface |  | ||||||
| 
 | 
 | ||||||
| debugfs Interface | debugfs Interface | ||||||
| ================= | ================= | ||||||
| 
 | 
 | ||||||
| DAMON exports five files, ``attrs``, ``target_ids``, ``init_regions``, | DAMON exports eight files, ``attrs``, ``target_ids``, ``init_regions``, | ||||||
| ``schemes`` and ``monitor_on`` under its debugfs directory, | ``schemes``, ``monitor_on``, ``kdamond_pid``, ``mk_contexts`` and | ||||||
| ``<debugfs>/damon/``. | ``rm_contexts`` under its debugfs directory, ``<debugfs>/damon/``. | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| Attributes | Attributes | ||||||
|  | @ -131,24 +134,38 @@ Schemes | ||||||
| 
 | 
 | ||||||
| For usual DAMON-based data access aware memory management optimizations, users | For usual DAMON-based data access aware memory management optimizations, users | ||||||
| would simply want the system to apply a memory management action to a memory | would simply want the system to apply a memory management action to a memory | ||||||
| region of a specific size having a specific access frequency for a specific | region of a specific access pattern.  DAMON receives such formalized operation | ||||||
| time.  DAMON receives such formalized operation schemes from the user and | schemes from the user and applies those to the target processes. | ||||||
| applies those to the target processes.  It also counts the total number and |  | ||||||
| size of regions that each scheme is applied.  This statistics can be used for |  | ||||||
| online analysis or tuning of the schemes. |  | ||||||
| 
 | 
 | ||||||
| Users can get and set the schemes by reading from and writing to ``schemes`` | Users can get and set the schemes by reading from and writing to ``schemes`` | ||||||
| debugfs file.  Reading the file also shows the statistics of each scheme.  To | debugfs file.  Reading the file also shows the statistics of each scheme.  To | ||||||
| the file, each of the schemes should be represented in each line in below form: | the file, each of the schemes should be represented in each line in below | ||||||
|  | form:: | ||||||
| 
 | 
 | ||||||
|     min-size max-size min-acc max-acc min-age max-age action |     <target access pattern> <action> <quota> <watermarks> | ||||||
| 
 | 
 | ||||||
| Note that the ranges are closed interval.  Bytes for the size of regions | You can disable schemes by simply writing an empty string to the file. | ||||||
| (``min-size`` and ``max-size``), number of monitored accesses per aggregate | 
 | ||||||
| interval for access frequency (``min-acc`` and ``max-acc``), number of | Target Access Pattern | ||||||
| aggregate intervals for the age of regions (``min-age`` and ``max-age``), and a | ~~~~~~~~~~~~~~~~~~~~~ | ||||||
| predefined integer for memory management actions should be used.  The supported | 
 | ||||||
| numbers and their meanings are as below. | The ``<target access pattern>`` is constructed with three ranges in below | ||||||
|  | form:: | ||||||
|  | 
 | ||||||
|  |     min-size max-size min-acc max-acc min-age max-age | ||||||
|  | 
 | ||||||
|  | Specifically, bytes for the size of regions (``min-size`` and ``max-size``), | ||||||
|  | number of monitored accesses per aggregate interval for access frequency | ||||||
|  | (``min-acc`` and ``max-acc``), number of aggregate intervals for the age of | ||||||
|  | regions (``min-age`` and ``max-age``) are specified.  Note that the ranges are | ||||||
|  | closed interval. | ||||||
|  | 
 | ||||||
|  | Action | ||||||
|  | ~~~~~~ | ||||||
|  | 
 | ||||||
|  | The ``<action>`` is a predefined integer for memory management actions, which | ||||||
|  | DAMON will apply to the regions having the target access pattern.  The | ||||||
|  | supported numbers and their meanings are as below. | ||||||
| 
 | 
 | ||||||
|  - 0: Call ``madvise()`` for the region with ``MADV_WILLNEED`` |  - 0: Call ``madvise()`` for the region with ``MADV_WILLNEED`` | ||||||
|  - 1: Call ``madvise()`` for the region with ``MADV_COLD`` |  - 1: Call ``madvise()`` for the region with ``MADV_COLD`` | ||||||
|  | @ -157,20 +174,82 @@ numbers and their meanings are as below. | ||||||
|  - 4: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE`` |  - 4: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE`` | ||||||
|  - 5: Do nothing but count the statistics |  - 5: Do nothing but count the statistics | ||||||
| 
 | 
 | ||||||
| You can disable schemes by simply writing an empty string to the file.  For | Quota | ||||||
| example, below commands applies a scheme saying "If a memory region of size in | ~~~~~ | ||||||
| [4KiB, 8KiB] is showing accesses per aggregate interval in [0, 5] for aggregate | 
 | ||||||
| interval in [10, 20], page out the region", check the entered scheme again, and | Optimal ``target access pattern`` for each ``action`` is workload dependent, so | ||||||
| finally remove the scheme. :: | not easy to find.  Worse yet, setting a scheme of some action too aggressive | ||||||
|  | can cause severe overhead.  To avoid such overhead, users can limit time and | ||||||
|  | size quota for the scheme via the ``<quota>`` in below form:: | ||||||
|  | 
 | ||||||
|  |     <ms> <sz> <reset interval> <priority weights> | ||||||
|  | 
 | ||||||
|  | This makes DAMON to try to use only up to ``<ms>`` milliseconds for applying | ||||||
|  | the action to memory regions of the ``target access pattern`` within the | ||||||
|  | ``<reset interval>`` milliseconds, and to apply the action to only up to | ||||||
|  | ``<sz>`` bytes of memory regions within the ``<reset interval>``.  Setting both | ||||||
|  | ``<ms>`` and ``<sz>`` zero disables the quota limits. | ||||||
|  | 
 | ||||||
|  | When the quota limit is expected to be exceeded, DAMON prioritizes found memory | ||||||
|  | regions of the ``target access pattern`` based on their size, access frequency, | ||||||
|  | and age.  For personalized prioritization, users can set the weights for the | ||||||
|  | three properties in ``<priority weights>`` in below form:: | ||||||
|  | 
 | ||||||
|  |     <size weight> <access frequency weight> <age weight> | ||||||
|  | 
 | ||||||
|  | Watermarks | ||||||
|  | ~~~~~~~~~~ | ||||||
|  | 
 | ||||||
|  | Some schemes would need to run based on current value of the system's specific | ||||||
|  | metrics like free memory ratio.  For such cases, users can specify watermarks | ||||||
|  | for the condition.:: | ||||||
|  | 
 | ||||||
|  |     <metric> <check interval> <high mark> <middle mark> <low mark> | ||||||
|  | 
 | ||||||
|  | ``<metric>`` is a predefined integer for the metric to be checked.  The | ||||||
|  | supported numbers and their meanings are as below. | ||||||
|  | 
 | ||||||
|  |  - 0: Ignore the watermarks | ||||||
|  |  - 1: System's free memory rate (per thousand) | ||||||
|  | 
 | ||||||
|  | The value of the metric is checked every ``<check interval>`` microseconds. | ||||||
|  | 
 | ||||||
|  | If the value is higher than ``<high mark>`` or lower than ``<low mark>``, the | ||||||
|  | scheme is deactivated.  If the value is lower than ``<mid mark>``, the scheme | ||||||
|  | is activated. | ||||||
|  | 
 | ||||||
|  | .. _damos_stats: | ||||||
|  | 
 | ||||||
|  | Statistics | ||||||
|  | ~~~~~~~~~~ | ||||||
|  | 
 | ||||||
|  | It also counts the total number and bytes of regions that each scheme is tried | ||||||
|  | to be applied, the two numbers for the regions that each scheme is successfully | ||||||
|  | applied, and the total number of the quota limit exceeds.  This statistics can | ||||||
|  | be used for online analysis or tuning of the schemes. | ||||||
|  | 
 | ||||||
|  | The statistics can be shown by reading the ``schemes`` file.  Reading the file | ||||||
|  | will show each scheme you entered in each line, and the five numbers for the | ||||||
|  | statistics will be added at the end of each line. | ||||||
|  | 
 | ||||||
|  | Example | ||||||
|  | ~~~~~~~ | ||||||
|  | 
 | ||||||
|  | Below commands applies a scheme saying "If a memory region of size in [4KiB, | ||||||
|  | 8KiB] is showing accesses per aggregate interval in [0, 5] for aggregate | ||||||
|  | interval in [10, 20], page out the region.  For the paging out, use only up to | ||||||
|  | 10ms per second, and also don't page out more than 1GiB per second.  Under the | ||||||
|  | limitation, page out memory regions having longer age first.  Also, check the | ||||||
|  | free memory rate of the system every 5 seconds, start the monitoring and paging | ||||||
|  | out when the free memory rate becomes lower than 50%, but stop it if the free | ||||||
|  | memory rate becomes larger than 60%, or lower than 30%".:: | ||||||
| 
 | 
 | ||||||
|     # cd <debugfs>/damon |     # cd <debugfs>/damon | ||||||
|     # echo "4096 8192    0 5    10 20    2" > schemes |     # scheme="4096 8192  0 5    10 20    2"  # target access pattern and action | ||||||
|     # cat schemes |     # scheme+=" 10 $((1024*1024*1024)) 1000" # quotas | ||||||
|     4096 8192 0 5 10 20 2 0 0 |     # scheme+=" 0 0 100"                     # prioritization weights | ||||||
|     # echo > schemes |     # scheme+=" 1 5000000 600 500 300"       # watermarks | ||||||
| 
 |     # echo "$scheme" > schemes | ||||||
| The last two integers in the 4th line of above example is the total number and |  | ||||||
| the total size of the regions that the scheme is applied. |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| Turning On/Off | Turning On/Off | ||||||
|  | @ -195,6 +274,54 @@ the monitoring is turned on.  If you write to the files while DAMON is running, | ||||||
| an error code such as ``-EBUSY`` will be returned. | an error code such as ``-EBUSY`` will be returned. | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | Monitoring Thread PID | ||||||
|  | --------------------- | ||||||
|  | 
 | ||||||
|  | DAMON does requested monitoring with a kernel thread called ``kdamond``.  You | ||||||
|  | can get the pid of the thread by reading the ``kdamond_pid`` file.  When the | ||||||
|  | monitoring is turned off, reading the file returns ``none``. :: | ||||||
|  | 
 | ||||||
|  |     # cd <debugfs>/damon | ||||||
|  |     # cat monitor_on | ||||||
|  |     off | ||||||
|  |     # cat kdamond_pid | ||||||
|  |     none | ||||||
|  |     # echo on > monitor_on | ||||||
|  |     # cat kdamond_pid | ||||||
|  |     18594 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | Using Multiple Monitoring Threads | ||||||
|  | --------------------------------- | ||||||
|  | 
 | ||||||
|  | One ``kdamond`` thread is created for each monitoring context.  You can create | ||||||
|  | and remove monitoring contexts for multiple ``kdamond`` required use case using | ||||||
|  | the ``mk_contexts`` and ``rm_contexts`` files. | ||||||
|  | 
 | ||||||
|  | Writing the name of the new context to the ``mk_contexts`` file creates a | ||||||
|  | directory of the name on the DAMON debugfs directory.  The directory will have | ||||||
|  | DAMON debugfs files for the context. :: | ||||||
|  | 
 | ||||||
|  |     # cd <debugfs>/damon | ||||||
|  |     # ls foo | ||||||
|  |     # ls: cannot access 'foo': No such file or directory | ||||||
|  |     # echo foo > mk_contexts | ||||||
|  |     # ls foo | ||||||
|  |     # attrs  init_regions  kdamond_pid  schemes  target_ids | ||||||
|  | 
 | ||||||
|  | If the context is not needed anymore, you can remove it and the corresponding | ||||||
|  | directory by putting the name of the context to the ``rm_contexts`` file. :: | ||||||
|  | 
 | ||||||
|  |     # echo foo > rm_contexts | ||||||
|  |     # ls foo | ||||||
|  |     # ls: cannot access 'foo': No such file or directory | ||||||
|  | 
 | ||||||
|  | Note that ``mk_contexts``, ``rm_contexts``, and ``monitor_on`` files are in the | ||||||
|  | root directory only. | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | .. _tracepoint: | ||||||
|  | 
 | ||||||
| Tracepoint for Monitoring Results | Tracepoint for Monitoring Results | ||||||
| ================================= | ================================= | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -408,7 +408,7 @@ follows: | ||||||
| Memory Policy APIs | Memory Policy APIs | ||||||
| ================== | ================== | ||||||
| 
 | 
 | ||||||
| Linux supports 3 system calls for controlling memory policy.  These APIS | Linux supports 4 system calls for controlling memory policy.  These APIS | ||||||
| always affect only the calling task, the calling task's address space, or | always affect only the calling task, the calling task's address space, or | ||||||
| some shared object mapped into the calling task's address space. | some shared object mapped into the calling task's address space. | ||||||
| 
 | 
 | ||||||
|  | @ -460,6 +460,20 @@ requested via the 'flags' argument. | ||||||
| 
 | 
 | ||||||
| See the mbind(2) man page for more details. | See the mbind(2) man page for more details. | ||||||
| 
 | 
 | ||||||
|  | Set home node for a Range of Task's Address Spacec:: | ||||||
|  | 
 | ||||||
|  | 	long sys_set_mempolicy_home_node(unsigned long start, unsigned long len, | ||||||
|  | 					 unsigned long home_node, | ||||||
|  | 					 unsigned long flags); | ||||||
|  | 
 | ||||||
|  | sys_set_mempolicy_home_node set the home node for a VMA policy present in the | ||||||
|  | task's address range. The system call updates the home node only for the existing | ||||||
|  | mempolicy range. Other address ranges are ignored. A home node is the NUMA node | ||||||
|  | closest to which page allocation will come from. Specifying the home node override | ||||||
|  | the default allocation policy to allocate memory close to the local node for an | ||||||
|  | executing CPU. | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| Memory Policy Command Line Interface | Memory Policy Command Line Interface | ||||||
| ==================================== | ==================================== | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -948,7 +948,7 @@ how much memory needs to be free before kswapd goes back to sleep. | ||||||
| 
 | 
 | ||||||
| The unit is in fractions of 10,000. The default value of 10 means the | The unit is in fractions of 10,000. The default value of 10 means the | ||||||
| distances between watermarks are 0.1% of the available memory in the | distances between watermarks are 0.1% of the available memory in the | ||||||
| node/system. The maximum value is 1000, or 10% of memory. | node/system. The maximum value is 3000, or 30% of memory. | ||||||
| 
 | 
 | ||||||
| A high rate of threads entering direct reclaim (allocstall) or kswapd | A high rate of threads entering direct reclaim (allocstall) or kswapd | ||||||
| going to sleep prematurely (kswapd_low_wmark_hit_quickly) can indicate | going to sleep prematurely (kswapd_low_wmark_hit_quickly) can indicate | ||||||
|  |  | ||||||
|  | @ -426,12 +426,14 @@ with the memory region, as the case would be with BSS (uninitialized data). | ||||||
| The "pathname" shows the name associated file for this mapping.  If the mapping | The "pathname" shows the name associated file for this mapping.  If the mapping | ||||||
| is not associated with a file: | is not associated with a file: | ||||||
| 
 | 
 | ||||||
|  =======                    ==================================== |  =============              ==================================== | ||||||
|  [heap]                     the heap of the program |  [heap]                     the heap of the program | ||||||
|  [stack]                    the stack of the main process |  [stack]                    the stack of the main process | ||||||
|  [vdso]                     the "virtual dynamic shared object", |  [vdso]                     the "virtual dynamic shared object", | ||||||
|                             the kernel system call handler |                             the kernel system call handler | ||||||
|  =======                    ==================================== |  [anon:<name>]              an anonymous mapping that has been | ||||||
|  |                             named by userspace | ||||||
|  |  =============              ==================================== | ||||||
| 
 | 
 | ||||||
|  or if empty, the mapping is anonymous. |  or if empty, the mapping is anonymous. | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -66,9 +66,11 @@ PTE Page Table Helpers | ||||||
| +---------------------------+--------------------------------------------------+ | +---------------------------+--------------------------------------------------+ | ||||||
| | pte_mknotpresent          | Invalidates a mapped PTE                         | | | pte_mknotpresent          | Invalidates a mapped PTE                         | | ||||||
| +---------------------------+--------------------------------------------------+ | +---------------------------+--------------------------------------------------+ | ||||||
| | ptep_get_and_clear        | Clears a PTE                                     | | | ptep_clear                | Clears a PTE                                     | | ||||||
| +---------------------------+--------------------------------------------------+ | +---------------------------+--------------------------------------------------+ | ||||||
| | ptep_get_and_clear_full   | Clears a PTE                                     | | | ptep_get_and_clear        | Clears and returns PTE                           | | ||||||
|  | +---------------------------+--------------------------------------------------+ | ||||||
|  | | ptep_get_and_clear_full   | Clears and returns PTE (batched PTE unmap)       | | ||||||
| +---------------------------+--------------------------------------------------+ | +---------------------------+--------------------------------------------------+ | ||||||
| | ptep_test_and_clear_young | Clears young from a PTE                          | | | ptep_test_and_clear_young | Clears young from a PTE                          | | ||||||
| +---------------------------+--------------------------------------------------+ | +---------------------------+--------------------------------------------------+ | ||||||
|  | @ -247,12 +249,12 @@ SWAP Page Table Helpers | ||||||
| | __swp_to_pmd_entry        | Creates a mapped PMD from a swapped entry (arch) | | | __swp_to_pmd_entry        | Creates a mapped PMD from a swapped entry (arch) | | ||||||
| +---------------------------+--------------------------------------------------+ | +---------------------------+--------------------------------------------------+ | ||||||
| | is_migration_entry        | Tests a migration (read or write) swapped entry  | | | is_migration_entry        | Tests a migration (read or write) swapped entry  | | ||||||
| +---------------------------+--------------------------------------------------+ | +-------------------------------+----------------------------------------------+ | ||||||
| | is_write_migration_entry  | Tests a write migration swapped entry            | | | is_writable_migration_entry   | Tests a write migration swapped entry        | | ||||||
| +---------------------------+--------------------------------------------------+ | +-------------------------------+----------------------------------------------+ | ||||||
| | make_migration_entry_read | Converts into read migration swapped entry       | | | make_readable_migration_entry | Creates a read migration swapped entry       | | ||||||
| +---------------------------+--------------------------------------------------+ | +-------------------------------+----------------------------------------------+ | ||||||
| | make_migration_entry      | Creates a migration swapped entry (read or write)| | | make_writable_migration_entry | Creates a write migration swapped entry      | | ||||||
| +---------------------------+--------------------------------------------------+ | +-------------------------------+----------------------------------------------+ | ||||||
| 
 | 
 | ||||||
| [1] https://lore.kernel.org/linux-mm/20181017020930.GN30832@redhat.com/ | [1] https://lore.kernel.org/linux-mm/20181017020930.GN30832@redhat.com/ | ||||||
|  |  | ||||||
|  | @ -31,10 +31,12 @@ algorithms.  If you are looking for advice on simply allocating memory, see the | ||||||
|    page_migration |    page_migration | ||||||
|    page_frags |    page_frags | ||||||
|    page_owner |    page_owner | ||||||
|  |    page_table_check | ||||||
|    remap_file_pages |    remap_file_pages | ||||||
|    slub |    slub | ||||||
|    split_page_table_lock |    split_page_table_lock | ||||||
|    transhuge |    transhuge | ||||||
|    unevictable-lru |    unevictable-lru | ||||||
|  |    vmalloced-kernel-stacks | ||||||
|    z3fold |    z3fold | ||||||
|    zsmalloc |    zsmalloc | ||||||
|  |  | ||||||
|  | @ -263,15 +263,15 @@ Monitoring Migration | ||||||
| The following events (counters) can be used to monitor page migration. | The following events (counters) can be used to monitor page migration. | ||||||
| 
 | 
 | ||||||
| 1. PGMIGRATE_SUCCESS: Normal page migration success. Each count means that a | 1. PGMIGRATE_SUCCESS: Normal page migration success. Each count means that a | ||||||
|    page was migrated. If the page was a non-THP page, then this counter is |    page was migrated. If the page was a non-THP and non-hugetlb page, then | ||||||
|    increased by one. If the page was a THP, then this counter is increased by |    this counter is increased by one. If the page was a THP or hugetlb, then | ||||||
|    the number of THP subpages. For example, migration of a single 2MB THP that |    this counter is increased by the number of THP or hugetlb subpages. | ||||||
|    has 4KB-size base pages (subpages) will cause this counter to increase by |    For example, migration of a single 2MB THP that has 4KB-size base pages | ||||||
|    512. |    (subpages) will cause this counter to increase by 512. | ||||||
| 
 | 
 | ||||||
| 2. PGMIGRATE_FAIL: Normal page migration failure. Same counting rules as for | 2. PGMIGRATE_FAIL: Normal page migration failure. Same counting rules as for | ||||||
|    PGMIGRATE_SUCCESS, above: this will be increased by the number of subpages, |    PGMIGRATE_SUCCESS, above: this will be increased by the number of subpages, | ||||||
|    if it was a THP. |    if it was a THP or hugetlb. | ||||||
| 
 | 
 | ||||||
| 3. THP_MIGRATION_SUCCESS: A THP was migrated without being split. | 3. THP_MIGRATION_SUCCESS: A THP was migrated without being split. | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										56
									
								
								Documentation/vm/page_table_check.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								Documentation/vm/page_table_check.rst
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,56 @@ | ||||||
|  | .. SPDX-License-Identifier: GPL-2.0 | ||||||
|  | 
 | ||||||
|  | .. _page_table_check: | ||||||
|  | 
 | ||||||
|  | ================ | ||||||
|  | Page Table Check | ||||||
|  | ================ | ||||||
|  | 
 | ||||||
|  | Introduction | ||||||
|  | ============ | ||||||
|  | 
 | ||||||
|  | Page table check allows to hardern the kernel by ensuring that some types of | ||||||
|  | the memory corruptions are prevented. | ||||||
|  | 
 | ||||||
|  | Page table check performs extra verifications at the time when new pages become | ||||||
|  | accessible from the userspace by getting their page table entries (PTEs PMDs | ||||||
|  | etc.) added into the table. | ||||||
|  | 
 | ||||||
|  | In case of detected corruption, the kernel is crashed. There is a small | ||||||
|  | performance and memory overhead associated with the page table check. Therefore, | ||||||
|  | it is disabled by default, but can be optionally enabled on systems where the | ||||||
|  | extra hardening outweighs the performance costs. Also, because page table check | ||||||
|  | is synchronous, it can help with debugging double map memory corruption issues, | ||||||
|  | by crashing kernel at the time wrong mapping occurs instead of later which is | ||||||
|  | often the case with memory corruptions bugs. | ||||||
|  | 
 | ||||||
|  | Double mapping detection logic | ||||||
|  | ============================== | ||||||
|  | 
 | ||||||
|  | +-------------------+-------------------+-------------------+------------------+ | ||||||
|  | | Current Mapping   | New mapping       | Permissions       | Rule             | | ||||||
|  | +===================+===================+===================+==================+ | ||||||
|  | | Anonymous         | Anonymous         | Read              | Allow            | | ||||||
|  | +-------------------+-------------------+-------------------+------------------+ | ||||||
|  | | Anonymous         | Anonymous         | Read / Write      | Prohibit         | | ||||||
|  | +-------------------+-------------------+-------------------+------------------+ | ||||||
|  | | Anonymous         | Named             | Any               | Prohibit         | | ||||||
|  | +-------------------+-------------------+-------------------+------------------+ | ||||||
|  | | Named             | Anonymous         | Any               | Prohibit         | | ||||||
|  | +-------------------+-------------------+-------------------+------------------+ | ||||||
|  | | Named             | Named             | Any               | Allow            | | ||||||
|  | +-------------------+-------------------+-------------------+------------------+ | ||||||
|  | 
 | ||||||
|  | Enabling Page Table Check | ||||||
|  | ========================= | ||||||
|  | 
 | ||||||
|  | Build kernel with: | ||||||
|  | 
 | ||||||
|  | - PAGE_TABLE_CHECK=y | ||||||
|  |   Note, it can only be enabled on platforms where ARCH_SUPPORTS_PAGE_TABLE_CHECK | ||||||
|  |   is available. | ||||||
|  | 
 | ||||||
|  | - Boot with 'page_table_check=on' kernel parameter. | ||||||
|  | 
 | ||||||
|  | Optionally, build kernel with PAGE_TABLE_CHECK_ENFORCED in order to have page | ||||||
|  | table support without extra kernel parameter. | ||||||
							
								
								
									
										153
									
								
								Documentation/vm/vmalloced-kernel-stacks.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										153
									
								
								Documentation/vm/vmalloced-kernel-stacks.rst
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,153 @@ | ||||||
|  | .. SPDX-License-Identifier: GPL-2.0 | ||||||
|  | 
 | ||||||
|  | ===================================== | ||||||
|  | Virtually Mapped Kernel Stack Support | ||||||
|  | ===================================== | ||||||
|  | 
 | ||||||
|  | :Author: Shuah Khan <skhan@linuxfoundation.org> | ||||||
|  | 
 | ||||||
|  | .. contents:: :local: | ||||||
|  | 
 | ||||||
|  | Overview | ||||||
|  | -------- | ||||||
|  | 
 | ||||||
|  | This is a compilation of information from the code and original patch | ||||||
|  | series that introduced the `Virtually Mapped Kernel Stacks feature | ||||||
|  | <https://lwn.net/Articles/694348/>` | ||||||
|  | 
 | ||||||
|  | Introduction | ||||||
|  | ------------ | ||||||
|  | 
 | ||||||
|  | Kernel stack overflows are often hard to debug and make the kernel | ||||||
|  | susceptible to exploits. Problems could show up at a later time making | ||||||
|  | it difficult to isolate and root-cause. | ||||||
|  | 
 | ||||||
|  | Virtually-mapped kernel stacks with guard pages causes kernel stack | ||||||
|  | overflows to be caught immediately rather than causing difficult to | ||||||
|  | diagnose corruptions. | ||||||
|  | 
 | ||||||
|  | HAVE_ARCH_VMAP_STACK and VMAP_STACK configuration options enable | ||||||
|  | support for virtually mapped stacks with guard pages. This feature | ||||||
|  | causes reliable faults when the stack overflows. The usability of | ||||||
|  | the stack trace after overflow and response to the overflow itself | ||||||
|  | is architecture dependent. | ||||||
|  | 
 | ||||||
|  | .. note:: | ||||||
|  |         As of this writing, arm64, powerpc, riscv, s390, um, and x86 have | ||||||
|  |         support for VMAP_STACK. | ||||||
|  | 
 | ||||||
|  | HAVE_ARCH_VMAP_STACK | ||||||
|  | -------------------- | ||||||
|  | 
 | ||||||
|  | Architectures that can support Virtually Mapped Kernel Stacks should | ||||||
|  | enable this bool configuration option. The requirements are: | ||||||
|  | 
 | ||||||
|  | - vmalloc space must be large enough to hold many kernel stacks. This | ||||||
|  |   may rule out many 32-bit architectures. | ||||||
|  | - Stacks in vmalloc space need to work reliably.  For example, if | ||||||
|  |   vmap page tables are created on demand, either this mechanism | ||||||
|  |   needs to work while the stack points to a virtual address with | ||||||
|  |   unpopulated page tables or arch code (switch_to() and switch_mm(), | ||||||
|  |   most likely) needs to ensure that the stack's page table entries | ||||||
|  |   are populated before running on a possibly unpopulated stack. | ||||||
|  | - If the stack overflows into a guard page, something reasonable | ||||||
|  |   should happen. The definition of "reasonable" is flexible, but | ||||||
|  |   instantly rebooting without logging anything would be unfriendly. | ||||||
|  | 
 | ||||||
|  | VMAP_STACK | ||||||
|  | ---------- | ||||||
|  | 
 | ||||||
|  | VMAP_STACK bool configuration option when enabled allocates virtually | ||||||
|  | mapped task stacks. This option depends on HAVE_ARCH_VMAP_STACK. | ||||||
|  | 
 | ||||||
|  | - Enable this if you want the use virtually-mapped kernel stacks | ||||||
|  |   with guard pages. This causes kernel stack overflows to be caught | ||||||
|  |   immediately rather than causing difficult-to-diagnose corruption. | ||||||
|  | 
 | ||||||
|  | .. note:: | ||||||
|  | 
 | ||||||
|  |         Using this feature with KASAN requires architecture support | ||||||
|  |         for backing virtual mappings with real shadow memory, and | ||||||
|  |         KASAN_VMALLOC must be enabled. | ||||||
|  | 
 | ||||||
|  | .. note:: | ||||||
|  | 
 | ||||||
|  |         VMAP_STACK is enabled, it is not possible to run DMA on stack | ||||||
|  |         allocated data. | ||||||
|  | 
 | ||||||
|  | Kernel configuration options and dependencies keep changing. Refer to | ||||||
|  | the latest code base: | ||||||
|  | 
 | ||||||
|  | `Kconfig <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/Kconfig>` | ||||||
|  | 
 | ||||||
|  | Allocation | ||||||
|  | ----------- | ||||||
|  | 
 | ||||||
|  | When a new kernel thread is created, thread stack is allocated from | ||||||
|  | virtually contiguous memory pages from the page level allocator. These | ||||||
|  | pages are mapped into contiguous kernel virtual space with PAGE_KERNEL | ||||||
|  | protections. | ||||||
|  | 
 | ||||||
|  | alloc_thread_stack_node() calls __vmalloc_node_range() to allocate stack | ||||||
|  | with PAGE_KERNEL protections. | ||||||
|  | 
 | ||||||
|  | - Allocated stacks are cached and later reused by new threads, so memcg | ||||||
|  |   accounting is performed manually on assigning/releasing stacks to tasks. | ||||||
|  |   Hence, __vmalloc_node_range is called without __GFP_ACCOUNT. | ||||||
|  | - vm_struct is cached to be able to find when thread free is initiated | ||||||
|  |   in interrupt context. free_thread_stack() can be called in interrupt | ||||||
|  |   context. | ||||||
|  | - On arm64, all VMAP's stacks need to have the same alignment to ensure | ||||||
|  |   that VMAP'd stack overflow detection works correctly. Arch specific | ||||||
|  |   vmap stack allocator takes care of this detail. | ||||||
|  | - This does not address interrupt stacks - according to the original patch | ||||||
|  | 
 | ||||||
|  | Thread stack allocation is initiated from clone(), fork(), vfork(), | ||||||
|  | kernel_thread() via kernel_clone(). Leaving a few hints for searching | ||||||
|  | the code base to understand when and how thread stack is allocated. | ||||||
|  | 
 | ||||||
|  | Bulk of the code is in: | ||||||
|  | `kernel/fork.c <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/fork.c>`. | ||||||
|  | 
 | ||||||
|  | stack_vm_area pointer in task_struct keeps track of the virtually allocated | ||||||
|  | stack and a non-null stack_vm_area pointer serves as a indication that the | ||||||
|  | virtually mapped kernel stacks are enabled. | ||||||
|  | 
 | ||||||
|  | :: | ||||||
|  | 
 | ||||||
|  |         struct vm_struct *stack_vm_area; | ||||||
|  | 
 | ||||||
|  | Stack overflow handling | ||||||
|  | ----------------------- | ||||||
|  | 
 | ||||||
|  | Leading and trailing guard pages help detect stack overflows. When stack | ||||||
|  | overflows into the guard pages, handlers have to be careful not overflow | ||||||
|  | the stack again. When handlers are called, it is likely that very little | ||||||
|  | stack space is left. | ||||||
|  | 
 | ||||||
|  | On x86, this is done by handling the page fault indicating the kernel | ||||||
|  | stack overflow on the double-fault stack. | ||||||
|  | 
 | ||||||
|  | Testing VMAP allocation with guard pages | ||||||
|  | ---------------------------------------- | ||||||
|  | 
 | ||||||
|  | How do we ensure that VMAP_STACK is actually allocating with a leading | ||||||
|  | and trailing guard page? The following lkdtm tests can help detect any | ||||||
|  | regressions. | ||||||
|  | 
 | ||||||
|  | :: | ||||||
|  | 
 | ||||||
|  |         void lkdtm_STACK_GUARD_PAGE_LEADING() | ||||||
|  |         void lkdtm_STACK_GUARD_PAGE_TRAILING() | ||||||
|  | 
 | ||||||
|  | Conclusions | ||||||
|  | ----------- | ||||||
|  | 
 | ||||||
|  | - A percpu cache of vmalloced stacks appears to be a bit faster than a | ||||||
|  |   high-order stack allocation, at least when the cache hits. | ||||||
|  | - THREAD_INFO_IN_TASK gets rid of arch-specific thread_info entirely and | ||||||
|  |   simply embed the thread_info (containing only flags) and 'int cpu' into | ||||||
|  |   task_struct. | ||||||
|  | - The thread stack can be free'ed as soon as the task is dead (without | ||||||
|  |   waiting for RCU) and then, if vmapped stacks are in use, cache the | ||||||
|  |   entire stack for reuse on the same cpu. | ||||||
|  | @ -14541,6 +14541,15 @@ F:	include/net/page_pool.h | ||||||
| F:	include/trace/events/page_pool.h | F:	include/trace/events/page_pool.h | ||||||
| F:	net/core/page_pool.c | F:	net/core/page_pool.c | ||||||
| 
 | 
 | ||||||
|  | PAGE TABLE CHECK | ||||||
|  | M:	Pasha Tatashin <pasha.tatashin@soleen.com> | ||||||
|  | M:	Andrew Morton <akpm@linux-foundation.org> | ||||||
|  | L:	linux-mm@kvack.org | ||||||
|  | S:	Maintained | ||||||
|  | F:	Documentation/vm/page_table_check.rst | ||||||
|  | F:	include/linux/page_table_check.h | ||||||
|  | F:	mm/page_table_check.c | ||||||
|  | 
 | ||||||
| PANASONIC LAPTOP ACPI EXTRAS DRIVER | PANASONIC LAPTOP ACPI EXTRAS DRIVER | ||||||
| M:	Kenneth Chan <kenneth.t.chan@gmail.com> | M:	Kenneth Chan <kenneth.t.chan@gmail.com> | ||||||
| L:	platform-driver-x86@vger.kernel.org | L:	platform-driver-x86@vger.kernel.org | ||||||
|  |  | ||||||
|  | @ -1297,6 +1297,9 @@ config HAVE_ARCH_PFN_VALID | ||||||
| config ARCH_SUPPORTS_DEBUG_PAGEALLOC | config ARCH_SUPPORTS_DEBUG_PAGEALLOC | ||||||
| 	bool | 	bool | ||||||
| 
 | 
 | ||||||
|  | config ARCH_SUPPORTS_PAGE_TABLE_CHECK | ||||||
|  | 	bool | ||||||
|  | 
 | ||||||
| config ARCH_SPLIT_ARG64 | config ARCH_SPLIT_ARG64 | ||||||
| 	bool | 	bool | ||||||
| 	help | 	help | ||||||
|  |  | ||||||
|  | @ -489,3 +489,4 @@ | ||||||
| # 557 reserved for memfd_secret | # 557 reserved for memfd_secret | ||||||
| 558	common	process_mrelease		sys_process_mrelease | 558	common	process_mrelease		sys_process_mrelease | ||||||
| 559	common  futex_waitv                     sys_futex_waitv | 559	common  futex_waitv                     sys_futex_waitv | ||||||
|  | 560	common	set_mempolicy_home_node		sys_ni_syscall | ||||||
|  |  | ||||||
|  | @ -165,7 +165,6 @@ do_page_fault(unsigned long address, unsigned long mmcsr, | ||||||
| 		BUG(); | 		BUG(); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (flags & FAULT_FLAG_ALLOW_RETRY) { |  | ||||||
| 	if (fault & VM_FAULT_RETRY) { | 	if (fault & VM_FAULT_RETRY) { | ||||||
| 		flags |= FAULT_FLAG_TRIED; | 		flags |= FAULT_FLAG_TRIED; | ||||||
| 
 | 
 | ||||||
|  | @ -176,7 +175,6 @@ do_page_fault(unsigned long address, unsigned long mmcsr, | ||||||
| 
 | 
 | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	mmap_read_unlock(mm); | 	mmap_read_unlock(mm); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -149,8 +149,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Fault retry nuances, mmap_lock already relinquished by core mm | 	 * Fault retry nuances, mmap_lock already relinquished by core mm | ||||||
| 	 */ | 	 */ | ||||||
| 	if (unlikely((fault & VM_FAULT_RETRY) && | 	if (unlikely(fault & VM_FAULT_RETRY)) { | ||||||
| 		     (flags & FAULT_FLAG_ALLOW_RETRY))) { |  | ||||||
| 		flags |= FAULT_FLAG_TRIED; | 		flags |= FAULT_FLAG_TRIED; | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -322,7 +322,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) | ||||||
| 		return 0; | 		return 0; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) { | 	if (!(fault & VM_FAULT_ERROR)) { | ||||||
| 		if (fault & VM_FAULT_RETRY) { | 		if (fault & VM_FAULT_RETRY) { | ||||||
| 			flags |= FAULT_FLAG_TRIED; | 			flags |= FAULT_FLAG_TRIED; | ||||||
| 			goto retry; | 			goto retry; | ||||||
|  |  | ||||||
|  | @ -463,3 +463,4 @@ | ||||||
| # 447 reserved for memfd_secret | # 447 reserved for memfd_secret | ||||||
| 448	common	process_mrelease		sys_process_mrelease | 448	common	process_mrelease		sys_process_mrelease | ||||||
| 449	common	futex_waitv			sys_futex_waitv | 449	common	futex_waitv			sys_futex_waitv | ||||||
|  | 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node | ||||||
|  |  | ||||||
|  | @ -38,7 +38,7 @@ | ||||||
| #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5) | #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5) | ||||||
| #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800) | #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800) | ||||||
| 
 | 
 | ||||||
| #define __NR_compat_syscalls		450 | #define __NR_compat_syscalls		451 | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| #define __ARCH_WANT_SYS_CLONE | #define __ARCH_WANT_SYS_CLONE | ||||||
|  |  | ||||||
|  | @ -905,6 +905,8 @@ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) | ||||||
| __SYSCALL(__NR_process_mrelease, sys_process_mrelease) | __SYSCALL(__NR_process_mrelease, sys_process_mrelease) | ||||||
| #define __NR_futex_waitv 449 | #define __NR_futex_waitv 449 | ||||||
| __SYSCALL(__NR_futex_waitv, sys_futex_waitv) | __SYSCALL(__NR_futex_waitv, sys_futex_waitv) | ||||||
|  | #define __NR_set_mempolicy_home_node 450 | ||||||
|  | __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Please add new compat syscalls above this comment and update |  * Please add new compat syscalls above this comment and update | ||||||
|  |  | ||||||
|  | @ -36,7 +36,7 @@ void *module_alloc(unsigned long size) | ||||||
| 		module_alloc_end = MODULES_END; | 		module_alloc_end = MODULES_END; | ||||||
| 
 | 
 | ||||||
| 	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, | 	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, | ||||||
| 				module_alloc_end, gfp_mask, PAGE_KERNEL, 0, | 				module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK, | ||||||
| 				NUMA_NO_NODE, __builtin_return_address(0)); | 				NUMA_NO_NODE, __builtin_return_address(0)); | ||||||
| 
 | 
 | ||||||
| 	if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && | 	if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && | ||||||
|  | @ -58,7 +58,7 @@ void *module_alloc(unsigned long size) | ||||||
| 				PAGE_KERNEL, 0, NUMA_NO_NODE, | 				PAGE_KERNEL, 0, NUMA_NO_NODE, | ||||||
| 				__builtin_return_address(0)); | 				__builtin_return_address(0)); | ||||||
| 
 | 
 | ||||||
| 	if (p && (kasan_module_alloc(p, size) < 0)) { | 	if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) { | ||||||
| 		vfree(p); | 		vfree(p); | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -608,11 +608,9 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr, | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (fault & VM_FAULT_RETRY) { | 	if (fault & VM_FAULT_RETRY) { | ||||||
| 		if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { |  | ||||||
| 		mm_flags |= FAULT_FLAG_TRIED; | 		mm_flags |= FAULT_FLAG_TRIED; | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 	} |  | ||||||
| 	mmap_read_unlock(mm); | 	mmap_read_unlock(mm); | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
|  |  | ||||||
|  | @ -98,12 +98,10 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs) | ||||||
| 
 | 
 | ||||||
| 	/* The most common case -- we are done. */ | 	/* The most common case -- we are done. */ | ||||||
| 	if (likely(!(fault & VM_FAULT_ERROR))) { | 	if (likely(!(fault & VM_FAULT_ERROR))) { | ||||||
| 		if (flags & FAULT_FLAG_ALLOW_RETRY) { |  | ||||||
| 		if (fault & VM_FAULT_RETRY) { | 		if (fault & VM_FAULT_RETRY) { | ||||||
| 			flags |= FAULT_FLAG_TRIED; | 			flags |= FAULT_FLAG_TRIED; | ||||||
| 			goto retry; | 			goto retry; | ||||||
| 		} | 		} | ||||||
| 		} |  | ||||||
| 
 | 
 | ||||||
| 		mmap_read_unlock(mm); | 		mmap_read_unlock(mm); | ||||||
| 		return; | 		return; | ||||||
|  |  | ||||||
|  | @ -848,7 +848,7 @@ register_unwind_table (struct module *mod) | ||||||
| { | { | ||||||
| 	struct unw_table_entry *start = (void *) mod->arch.unwind->sh_addr; | 	struct unw_table_entry *start = (void *) mod->arch.unwind->sh_addr; | ||||||
| 	struct unw_table_entry *end = start + mod->arch.unwind->sh_size / sizeof (*start); | 	struct unw_table_entry *end = start + mod->arch.unwind->sh_size / sizeof (*start); | ||||||
| 	struct unw_table_entry tmp, *e1, *e2, *core, *init; | 	struct unw_table_entry *e1, *e2, *core, *init; | ||||||
| 	unsigned long num_init = 0, num_core = 0; | 	unsigned long num_init = 0, num_core = 0; | ||||||
| 
 | 
 | ||||||
| 	/* First, count how many init and core unwind-table entries there are.  */ | 	/* First, count how many init and core unwind-table entries there are.  */ | ||||||
|  | @ -865,9 +865,7 @@ register_unwind_table (struct module *mod) | ||||||
| 	for (e1 = start; e1 < end; ++e1) { | 	for (e1 = start; e1 < end; ++e1) { | ||||||
| 		for (e2 = e1 + 1; e2 < end; ++e2) { | 		for (e2 = e1 + 1; e2 < end; ++e2) { | ||||||
| 			if (e2->start_offset < e1->start_offset) { | 			if (e2->start_offset < e1->start_offset) { | ||||||
| 				tmp = *e1; | 				swap(*e1, *e2); | ||||||
| 				*e1 = *e2; |  | ||||||
| 				*e2 = tmp; |  | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -208,10 +208,7 @@ sort_regions (struct rsvd_region *rsvd_region, int max) | ||||||
| 	while (max--) { | 	while (max--) { | ||||||
| 		for (j = 0; j < max; ++j) { | 		for (j = 0; j < max; ++j) { | ||||||
| 			if (rsvd_region[j].start > rsvd_region[j+1].start) { | 			if (rsvd_region[j].start > rsvd_region[j+1].start) { | ||||||
| 				struct rsvd_region tmp; | 				swap(rsvd_region[j], rsvd_region[j + 1]); | ||||||
| 				tmp = rsvd_region[j]; |  | ||||||
| 				rsvd_region[j] = rsvd_region[j + 1]; |  | ||||||
| 				rsvd_region[j + 1] = tmp; |  | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -370,3 +370,4 @@ | ||||||
| # 447 reserved for memfd_secret | # 447 reserved for memfd_secret | ||||||
| 448	common	process_mrelease		sys_process_mrelease | 448	common	process_mrelease		sys_process_mrelease | ||||||
| 449	common  futex_waitv                     sys_futex_waitv | 449	common  futex_waitv                     sys_futex_waitv | ||||||
|  | 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node | ||||||
|  |  | ||||||
|  | @ -264,6 +264,7 @@ static struct attribute * cache_default_attrs[] = { | ||||||
| 	&shared_cpu_map.attr, | 	&shared_cpu_map.attr, | ||||||
| 	NULL | 	NULL | ||||||
| }; | }; | ||||||
|  | ATTRIBUTE_GROUPS(cache_default); | ||||||
| 
 | 
 | ||||||
| #define to_object(k) container_of(k, struct cache_info, kobj) | #define to_object(k) container_of(k, struct cache_info, kobj) | ||||||
| #define to_attr(a) container_of(a, struct cache_attr, attr) | #define to_attr(a) container_of(a, struct cache_attr, attr) | ||||||
|  | @ -284,7 +285,7 @@ static const struct sysfs_ops cache_sysfs_ops = { | ||||||
| 
 | 
 | ||||||
| static struct kobj_type cache_ktype = { | static struct kobj_type cache_ktype = { | ||||||
| 	.sysfs_ops	= &cache_sysfs_ops, | 	.sysfs_ops	= &cache_sysfs_ops, | ||||||
| 	.default_attrs	= cache_default_attrs, | 	.default_groups	= cache_default_groups, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static struct kobj_type cache_ktype_percpu_entry = { | static struct kobj_type cache_ktype_percpu_entry = { | ||||||
|  |  | ||||||
|  | @ -171,7 +171,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) | ||||||
|  * @n_pages: number of contiguous pages to allocate |  * @n_pages: number of contiguous pages to allocate | ||||||
|  * |  * | ||||||
|  * Allocate the specified number of contiguous uncached pages on the |  * Allocate the specified number of contiguous uncached pages on the | ||||||
|  * the requested node. If not enough contiguous uncached pages are available |  * requested node. If not enough contiguous uncached pages are available | ||||||
|  * on the requested node, roundrobin starting with the next higher node. |  * on the requested node, roundrobin starting with the next higher node. | ||||||
|  */ |  */ | ||||||
| unsigned long uncached_alloc_page(int starting_nid, int n_pages) | unsigned long uncached_alloc_page(int starting_nid, int n_pages) | ||||||
|  |  | ||||||
|  | @ -156,7 +156,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re | ||||||
| 		BUG(); | 		BUG(); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (flags & FAULT_FLAG_ALLOW_RETRY) { |  | ||||||
| 	if (fault & VM_FAULT_RETRY) { | 	if (fault & VM_FAULT_RETRY) { | ||||||
| 		flags |= FAULT_FLAG_TRIED; | 		flags |= FAULT_FLAG_TRIED; | ||||||
| 
 | 
 | ||||||
|  | @ -167,7 +166,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re | ||||||
| 
 | 
 | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	mmap_read_unlock(mm); | 	mmap_read_unlock(mm); | ||||||
| 	return; | 	return; | ||||||
|  |  | ||||||
|  | @ -449,3 +449,4 @@ | ||||||
| # 447 reserved for memfd_secret | # 447 reserved for memfd_secret | ||||||
| 448	common	process_mrelease		sys_process_mrelease | 448	common	process_mrelease		sys_process_mrelease | ||||||
| 449	common  futex_waitv                     sys_futex_waitv | 449	common  futex_waitv                     sys_futex_waitv | ||||||
|  | 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node | ||||||
|  |  | ||||||
|  | @ -153,7 +153,6 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, | ||||||
| 		BUG(); | 		BUG(); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (flags & FAULT_FLAG_ALLOW_RETRY) { |  | ||||||
| 	if (fault & VM_FAULT_RETRY) { | 	if (fault & VM_FAULT_RETRY) { | ||||||
| 		flags |= FAULT_FLAG_TRIED; | 		flags |= FAULT_FLAG_TRIED; | ||||||
| 
 | 
 | ||||||
|  | @ -165,7 +164,6 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, | ||||||
| 
 | 
 | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	mmap_read_unlock(mm); | 	mmap_read_unlock(mm); | ||||||
| 	return 0; | 	return 0; | ||||||
|  |  | ||||||
|  | @ -455,3 +455,4 @@ | ||||||
| # 447 reserved for memfd_secret | # 447 reserved for memfd_secret | ||||||
| 448	common	process_mrelease		sys_process_mrelease | 448	common	process_mrelease		sys_process_mrelease | ||||||
| 449	common  futex_waitv                     sys_futex_waitv | 449	common  futex_waitv                     sys_futex_waitv | ||||||
|  | 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node | ||||||
|  |  | ||||||
|  | @ -232,7 +232,6 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, | ||||||
| 		BUG(); | 		BUG(); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (flags & FAULT_FLAG_ALLOW_RETRY) { |  | ||||||
| 	if (fault & VM_FAULT_RETRY) { | 	if (fault & VM_FAULT_RETRY) { | ||||||
| 		flags |= FAULT_FLAG_TRIED; | 		flags |= FAULT_FLAG_TRIED; | ||||||
| 
 | 
 | ||||||
|  | @ -244,7 +243,6 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, | ||||||
| 
 | 
 | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	mmap_read_unlock(mm); | 	mmap_read_unlock(mm); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -388,3 +388,4 @@ | ||||||
| # 447 reserved for memfd_secret | # 447 reserved for memfd_secret | ||||||
| 448	n32	process_mrelease		sys_process_mrelease | 448	n32	process_mrelease		sys_process_mrelease | ||||||
| 449	n32	futex_waitv			sys_futex_waitv | 449	n32	futex_waitv			sys_futex_waitv | ||||||
|  | 450	n32	set_mempolicy_home_node		sys_set_mempolicy_home_node | ||||||
|  |  | ||||||
|  | @ -364,3 +364,4 @@ | ||||||
| # 447 reserved for memfd_secret | # 447 reserved for memfd_secret | ||||||
| 448	n64	process_mrelease		sys_process_mrelease | 448	n64	process_mrelease		sys_process_mrelease | ||||||
| 449	n64	futex_waitv			sys_futex_waitv | 449	n64	futex_waitv			sys_futex_waitv | ||||||
|  | 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node | ||||||
|  |  | ||||||
|  | @ -437,3 +437,4 @@ | ||||||
| # 447 reserved for memfd_secret | # 447 reserved for memfd_secret | ||||||
| 448	o32	process_mrelease		sys_process_mrelease | 448	o32	process_mrelease		sys_process_mrelease | ||||||
| 449	o32	futex_waitv			sys_futex_waitv | 449	o32	futex_waitv			sys_futex_waitv | ||||||
|  | 450	o32	set_mempolicy_home_node		sys_set_mempolicy_home_node | ||||||
|  |  | ||||||
|  | @ -171,7 +171,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, | ||||||
| 			goto do_sigbus; | 			goto do_sigbus; | ||||||
| 		BUG(); | 		BUG(); | ||||||
| 	} | 	} | ||||||
| 	if (flags & FAULT_FLAG_ALLOW_RETRY) { | 
 | ||||||
| 	if (fault & VM_FAULT_RETRY) { | 	if (fault & VM_FAULT_RETRY) { | ||||||
| 		flags |= FAULT_FLAG_TRIED; | 		flags |= FAULT_FLAG_TRIED; | ||||||
| 
 | 
 | ||||||
|  | @ -183,7 +183,6 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, | ||||||
| 
 | 
 | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	mmap_read_unlock(mm); | 	mmap_read_unlock(mm); | ||||||
| 	return; | 	return; | ||||||
|  |  | ||||||
|  | @ -230,7 +230,6 @@ void do_page_fault(unsigned long entry, unsigned long addr, | ||||||
| 			goto bad_area; | 			goto bad_area; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (flags & FAULT_FLAG_ALLOW_RETRY) { |  | ||||||
| 	if (fault & VM_FAULT_RETRY) { | 	if (fault & VM_FAULT_RETRY) { | ||||||
| 		flags |= FAULT_FLAG_TRIED; | 		flags |= FAULT_FLAG_TRIED; | ||||||
| 
 | 
 | ||||||
|  | @ -240,7 +239,6 @@ void do_page_fault(unsigned long entry, unsigned long addr, | ||||||
| 		 */ | 		 */ | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	mmap_read_unlock(mm); | 	mmap_read_unlock(mm); | ||||||
| 	return; | 	return; | ||||||
|  |  | ||||||
|  | @ -149,7 +149,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause, | ||||||
| 		BUG(); | 		BUG(); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (flags & FAULT_FLAG_ALLOW_RETRY) { |  | ||||||
| 	if (fault & VM_FAULT_RETRY) { | 	if (fault & VM_FAULT_RETRY) { | ||||||
| 		flags |= FAULT_FLAG_TRIED; | 		flags |= FAULT_FLAG_TRIED; | ||||||
| 
 | 
 | ||||||
|  | @ -161,7 +160,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause, | ||||||
| 
 | 
 | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	mmap_read_unlock(mm); | 	mmap_read_unlock(mm); | ||||||
| 	return; | 	return; | ||||||
|  |  | ||||||
|  | @ -177,7 +177,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address, | ||||||
| 		BUG(); | 		BUG(); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (flags & FAULT_FLAG_ALLOW_RETRY) { |  | ||||||
| 	/*RGD modeled on Cris */ | 	/*RGD modeled on Cris */ | ||||||
| 	if (fault & VM_FAULT_RETRY) { | 	if (fault & VM_FAULT_RETRY) { | ||||||
| 		flags |= FAULT_FLAG_TRIED; | 		flags |= FAULT_FLAG_TRIED; | ||||||
|  | @ -189,7 +188,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address, | ||||||
| 
 | 
 | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	mmap_read_unlock(mm); | 	mmap_read_unlock(mm); | ||||||
| 	return; | 	return; | ||||||
|  |  | ||||||
|  | @ -447,3 +447,4 @@ | ||||||
| # 447 reserved for memfd_secret | # 447 reserved for memfd_secret | ||||||
| 448	common	process_mrelease		sys_process_mrelease | 448	common	process_mrelease		sys_process_mrelease | ||||||
| 449	common	futex_waitv			sys_futex_waitv | 449	common	futex_waitv			sys_futex_waitv | ||||||
|  | 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node | ||||||
|  |  | ||||||
|  | @ -324,7 +324,6 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, | ||||||
| 			goto bad_area; | 			goto bad_area; | ||||||
| 		BUG(); | 		BUG(); | ||||||
| 	} | 	} | ||||||
| 	if (flags & FAULT_FLAG_ALLOW_RETRY) { |  | ||||||
| 	if (fault & VM_FAULT_RETRY) { | 	if (fault & VM_FAULT_RETRY) { | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * No need to mmap_read_unlock(mm) as we would | 		 * No need to mmap_read_unlock(mm) as we would | ||||||
|  | @ -334,7 +333,6 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, | ||||||
| 		flags |= FAULT_FLAG_TRIED; | 		flags |= FAULT_FLAG_TRIED; | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 	} |  | ||||||
| 	mmap_read_unlock(mm); | 	mmap_read_unlock(mm); | ||||||
| 	return; | 	return; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -529,3 +529,4 @@ | ||||||
| # 447 reserved for memfd_secret | # 447 reserved for memfd_secret | ||||||
| 448	common	process_mrelease		sys_process_mrelease | 448	common	process_mrelease		sys_process_mrelease | ||||||
| 449	common  futex_waitv                     sys_futex_waitv | 449	common  futex_waitv                     sys_futex_waitv | ||||||
|  | 450 	nospu	set_mempolicy_home_node		sys_set_mempolicy_home_node | ||||||
|  |  | ||||||
|  | @ -517,11 +517,9 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, | ||||||
| 	 * case. | 	 * case. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (unlikely(fault & VM_FAULT_RETRY)) { | 	if (unlikely(fault & VM_FAULT_RETRY)) { | ||||||
| 		if (flags & FAULT_FLAG_ALLOW_RETRY) { |  | ||||||
| 		flags |= FAULT_FLAG_TRIED; | 		flags |= FAULT_FLAG_TRIED; | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	mmap_read_unlock(current->mm); | 	mmap_read_unlock(current->mm); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -330,7 +330,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs) | ||||||
| 	if (fault_signal_pending(fault, regs)) | 	if (fault_signal_pending(fault, regs)) | ||||||
| 		return; | 		return; | ||||||
| 
 | 
 | ||||||
| 	if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) { | 	if (unlikely(fault & VM_FAULT_RETRY)) { | ||||||
| 		flags |= FAULT_FLAG_TRIED; | 		flags |= FAULT_FLAG_TRIED; | ||||||
| 
 | 
 | ||||||
| 		/*
 | 		/*
 | ||||||
|  |  | ||||||
|  | @ -37,14 +37,15 @@ | ||||||
| 
 | 
 | ||||||
| void *module_alloc(unsigned long size) | void *module_alloc(unsigned long size) | ||||||
| { | { | ||||||
|  | 	gfp_t gfp_mask = GFP_KERNEL; | ||||||
| 	void *p; | 	void *p; | ||||||
| 
 | 
 | ||||||
| 	if (PAGE_ALIGN(size) > MODULES_LEN) | 	if (PAGE_ALIGN(size) > MODULES_LEN) | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, | 	p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, | ||||||
| 				 GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, | 				 gfp_mask, PAGE_KERNEL_EXEC, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, | ||||||
| 				 __builtin_return_address(0)); | 				 __builtin_return_address(0)); | ||||||
| 	if (p && (kasan_module_alloc(p, size) < 0)) { | 	if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) { | ||||||
| 		vfree(p); | 		vfree(p); | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -452,3 +452,4 @@ | ||||||
| # 447 reserved for memfd_secret | # 447 reserved for memfd_secret | ||||||
| 448  common	process_mrelease	sys_process_mrelease		sys_process_mrelease | 448  common	process_mrelease	sys_process_mrelease		sys_process_mrelease | ||||||
| 449  common	futex_waitv		sys_futex_waitv			sys_futex_waitv | 449  common	futex_waitv		sys_futex_waitv			sys_futex_waitv | ||||||
|  | 450  common	set_mempolicy_home_node	sys_set_mempolicy_home_node	sys_set_mempolicy_home_node | ||||||
|  |  | ||||||
|  | @ -452,12 +452,13 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) | ||||||
| 	if (unlikely(fault & VM_FAULT_ERROR)) | 	if (unlikely(fault & VM_FAULT_ERROR)) | ||||||
| 		goto out_up; | 		goto out_up; | ||||||
| 
 | 
 | ||||||
| 	if (flags & FAULT_FLAG_ALLOW_RETRY) { |  | ||||||
| 	if (fault & VM_FAULT_RETRY) { | 	if (fault & VM_FAULT_RETRY) { | ||||||
| 		if (IS_ENABLED(CONFIG_PGSTE) && gmap && | 		if (IS_ENABLED(CONFIG_PGSTE) && gmap && | ||||||
| 			(flags & FAULT_FLAG_RETRY_NOWAIT)) { | 			(flags & FAULT_FLAG_RETRY_NOWAIT)) { | ||||||
| 				/* FAULT_FLAG_RETRY_NOWAIT has been set,
 | 			/*
 | ||||||
| 				 * mmap_lock has not been released */ | 			 * FAULT_FLAG_RETRY_NOWAIT has been set, mmap_lock has | ||||||
|  | 			 * not been released | ||||||
|  | 			 */ | ||||||
| 			current->thread.gmap_pfault = 1; | 			current->thread.gmap_pfault = 1; | ||||||
| 			fault = VM_FAULT_PFAULT; | 			fault = VM_FAULT_PFAULT; | ||||||
| 			goto out_up; | 			goto out_up; | ||||||
|  | @ -467,7 +468,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) | ||||||
| 		mmap_read_lock(mm); | 		mmap_read_lock(mm); | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 	} |  | ||||||
| 	if (IS_ENABLED(CONFIG_PGSTE) && gmap) { | 	if (IS_ENABLED(CONFIG_PGSTE) && gmap) { | ||||||
| 		address =  __gmap_link(gmap, current->thread.gmap_addr, | 		address =  __gmap_link(gmap, current->thread.gmap_addr, | ||||||
| 				       address); | 				       address); | ||||||
|  |  | ||||||
|  | @ -452,3 +452,4 @@ | ||||||
| # 447 reserved for memfd_secret | # 447 reserved for memfd_secret | ||||||
| 448	common	process_mrelease		sys_process_mrelease | 448	common	process_mrelease		sys_process_mrelease | ||||||
| 449	common  futex_waitv                     sys_futex_waitv | 449	common  futex_waitv                     sys_futex_waitv | ||||||
|  | 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node | ||||||
|  |  | ||||||
|  | @ -485,7 +485,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | ||||||
| 		if (mm_fault_error(regs, error_code, address, fault)) | 		if (mm_fault_error(regs, error_code, address, fault)) | ||||||
| 			return; | 			return; | ||||||
| 
 | 
 | ||||||
| 	if (flags & FAULT_FLAG_ALLOW_RETRY) { |  | ||||||
| 	if (fault & VM_FAULT_RETRY) { | 	if (fault & VM_FAULT_RETRY) { | ||||||
| 		flags |= FAULT_FLAG_TRIED; | 		flags |= FAULT_FLAG_TRIED; | ||||||
| 
 | 
 | ||||||
|  | @ -496,7 +495,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | ||||||
| 		 */ | 		 */ | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	mmap_read_unlock(mm); | 	mmap_read_unlock(mm); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -495,3 +495,4 @@ | ||||||
| # 447 reserved for memfd_secret | # 447 reserved for memfd_secret | ||||||
| 448	common	process_mrelease		sys_process_mrelease | 448	common	process_mrelease		sys_process_mrelease | ||||||
| 449	common  futex_waitv                     sys_futex_waitv | 449	common  futex_waitv                     sys_futex_waitv | ||||||
|  | 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node | ||||||
|  |  | ||||||
|  | @ -200,7 +200,6 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, | ||||||
| 		BUG(); | 		BUG(); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (flags & FAULT_FLAG_ALLOW_RETRY) { |  | ||||||
| 	if (fault & VM_FAULT_RETRY) { | 	if (fault & VM_FAULT_RETRY) { | ||||||
| 		flags |= FAULT_FLAG_TRIED; | 		flags |= FAULT_FLAG_TRIED; | ||||||
| 
 | 
 | ||||||
|  | @ -211,7 +210,6 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, | ||||||
| 
 | 
 | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	mmap_read_unlock(mm); | 	mmap_read_unlock(mm); | ||||||
| 	return; | 	return; | ||||||
|  |  | ||||||
|  | @ -437,7 +437,6 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) | ||||||
| 		BUG(); | 		BUG(); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (flags & FAULT_FLAG_ALLOW_RETRY) { |  | ||||||
| 	if (fault & VM_FAULT_RETRY) { | 	if (fault & VM_FAULT_RETRY) { | ||||||
| 		flags |= FAULT_FLAG_TRIED; | 		flags |= FAULT_FLAG_TRIED; | ||||||
| 
 | 
 | ||||||
|  | @ -448,7 +447,6 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) | ||||||
| 
 | 
 | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 	} |  | ||||||
| 	mmap_read_unlock(mm); | 	mmap_read_unlock(mm); | ||||||
| 
 | 
 | ||||||
| 	mm_rss = get_mm_rss(mm); | 	mm_rss = get_mm_rss(mm); | ||||||
|  |  | ||||||
|  | @ -87,13 +87,11 @@ int handle_page_fault(unsigned long address, unsigned long ip, | ||||||
| 			} | 			} | ||||||
| 			BUG(); | 			BUG(); | ||||||
| 		} | 		} | ||||||
| 		if (flags & FAULT_FLAG_ALLOW_RETRY) { |  | ||||||
| 		if (fault & VM_FAULT_RETRY) { | 		if (fault & VM_FAULT_RETRY) { | ||||||
| 			flags |= FAULT_FLAG_TRIED; | 			flags |= FAULT_FLAG_TRIED; | ||||||
| 
 | 
 | ||||||
| 			goto retry; | 			goto retry; | ||||||
| 		} | 		} | ||||||
| 		} |  | ||||||
| 
 | 
 | ||||||
| 		pmd = pmd_off(mm, address); | 		pmd = pmd_off(mm, address); | ||||||
| 		pte = pte_offset_kernel(pmd, address); | 		pte = pte_offset_kernel(pmd, address); | ||||||
|  |  | ||||||
|  | @ -104,6 +104,7 @@ config X86 | ||||||
| 	select ARCH_SUPPORTS_ACPI | 	select ARCH_SUPPORTS_ACPI | ||||||
| 	select ARCH_SUPPORTS_ATOMIC_RMW | 	select ARCH_SUPPORTS_ATOMIC_RMW | ||||||
| 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC | 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC | ||||||
|  | 	select ARCH_SUPPORTS_PAGE_TABLE_CHECK	if X86_64 | ||||||
| 	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64 | 	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64 | ||||||
| 	select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP	if NR_CPUS <= 4096 | 	select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP	if NR_CPUS <= 4096 | ||||||
| 	select ARCH_SUPPORTS_LTO_CLANG | 	select ARCH_SUPPORTS_LTO_CLANG | ||||||
|  |  | ||||||
|  | @ -454,3 +454,4 @@ | ||||||
| 447	i386	memfd_secret		sys_memfd_secret | 447	i386	memfd_secret		sys_memfd_secret | ||||||
| 448	i386	process_mrelease	sys_process_mrelease | 448	i386	process_mrelease	sys_process_mrelease | ||||||
| 449	i386	futex_waitv		sys_futex_waitv | 449	i386	futex_waitv		sys_futex_waitv | ||||||
|  | 450	i386	set_mempolicy_home_node		sys_set_mempolicy_home_node | ||||||
|  |  | ||||||
|  | @ -371,6 +371,7 @@ | ||||||
| 447	common	memfd_secret		sys_memfd_secret | 447	common	memfd_secret		sys_memfd_secret | ||||||
| 448	common	process_mrelease	sys_process_mrelease | 448	common	process_mrelease	sys_process_mrelease | ||||||
| 449	common	futex_waitv		sys_futex_waitv | 449	common	futex_waitv		sys_futex_waitv | ||||||
|  | 450	common	set_mempolicy_home_node	sys_set_mempolicy_home_node | ||||||
| 
 | 
 | ||||||
| # | # | ||||||
| # Due to a historical design error, certain syscalls are numbered differently | # Due to a historical design error, certain syscalls are numbered differently | ||||||
|  |  | ||||||
|  | @ -27,6 +27,7 @@ | ||||||
| #include <asm/pkru.h> | #include <asm/pkru.h> | ||||||
| #include <asm/fpu/api.h> | #include <asm/fpu/api.h> | ||||||
| #include <asm-generic/pgtable_uffd.h> | #include <asm-generic/pgtable_uffd.h> | ||||||
|  | #include <linux/page_table_check.h> | ||||||
| 
 | 
 | ||||||
| extern pgd_t early_top_pgt[PTRS_PER_PGD]; | extern pgd_t early_top_pgt[PTRS_PER_PGD]; | ||||||
| bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd); | bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd); | ||||||
|  | @ -753,7 +754,7 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a) | ||||||
| 		return true; | 		return true; | ||||||
| 
 | 
 | ||||||
| 	if ((pte_flags(a) & _PAGE_PROTNONE) && | 	if ((pte_flags(a) & _PAGE_PROTNONE) && | ||||||
| 			mm_tlb_flush_pending(mm)) | 			atomic_read(&mm->tlb_flush_pending)) | ||||||
| 		return true; | 		return true; | ||||||
| 
 | 
 | ||||||
| 	return false; | 	return false; | ||||||
|  | @ -1007,18 +1008,21 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp) | ||||||
| static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, | static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, | ||||||
| 			      pte_t *ptep, pte_t pte) | 			      pte_t *ptep, pte_t pte) | ||||||
| { | { | ||||||
|  | 	page_table_check_pte_set(mm, addr, ptep, pte); | ||||||
| 	set_pte(ptep, pte); | 	set_pte(ptep, pte); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, | static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, | ||||||
| 			      pmd_t *pmdp, pmd_t pmd) | 			      pmd_t *pmdp, pmd_t pmd) | ||||||
| { | { | ||||||
|  | 	page_table_check_pmd_set(mm, addr, pmdp, pmd); | ||||||
| 	set_pmd(pmdp, pmd); | 	set_pmd(pmdp, pmd); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, | static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, | ||||||
| 			      pud_t *pudp, pud_t pud) | 			      pud_t *pudp, pud_t pud) | ||||||
| { | { | ||||||
|  | 	page_table_check_pud_set(mm, addr, pudp, pud); | ||||||
| 	native_set_pud(pudp, pud); | 	native_set_pud(pudp, pud); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -1049,6 +1053,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | ||||||
| 				       pte_t *ptep) | 				       pte_t *ptep) | ||||||
| { | { | ||||||
| 	pte_t pte = native_ptep_get_and_clear(ptep); | 	pte_t pte = native_ptep_get_and_clear(ptep); | ||||||
|  | 	page_table_check_pte_clear(mm, addr, pte); | ||||||
| 	return pte; | 	return pte; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -1064,12 +1069,23 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, | ||||||
| 		 * care about updates and native needs no locking | 		 * care about updates and native needs no locking | ||||||
| 		 */ | 		 */ | ||||||
| 		pte = native_local_ptep_get_and_clear(ptep); | 		pte = native_local_ptep_get_and_clear(ptep); | ||||||
|  | 		page_table_check_pte_clear(mm, addr, pte); | ||||||
| 	} else { | 	} else { | ||||||
| 		pte = ptep_get_and_clear(mm, addr, ptep); | 		pte = ptep_get_and_clear(mm, addr, ptep); | ||||||
| 	} | 	} | ||||||
| 	return pte; | 	return pte; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #define __HAVE_ARCH_PTEP_CLEAR | ||||||
|  | static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, | ||||||
|  | 			      pte_t *ptep) | ||||||
|  | { | ||||||
|  | 	if (IS_ENABLED(CONFIG_PAGE_TABLE_CHECK)) | ||||||
|  | 		ptep_get_and_clear(mm, addr, ptep); | ||||||
|  | 	else | ||||||
|  | 		pte_clear(mm, addr, ptep); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| #define __HAVE_ARCH_PTEP_SET_WRPROTECT | #define __HAVE_ARCH_PTEP_SET_WRPROTECT | ||||||
| static inline void ptep_set_wrprotect(struct mm_struct *mm, | static inline void ptep_set_wrprotect(struct mm_struct *mm, | ||||||
| 				      unsigned long addr, pte_t *ptep) | 				      unsigned long addr, pte_t *ptep) | ||||||
|  | @ -1110,14 +1126,22 @@ static inline int pmd_write(pmd_t pmd) | ||||||
| static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, | static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, | ||||||
| 				       pmd_t *pmdp) | 				       pmd_t *pmdp) | ||||||
| { | { | ||||||
| 	return native_pmdp_get_and_clear(pmdp); | 	pmd_t pmd = native_pmdp_get_and_clear(pmdp); | ||||||
|  | 
 | ||||||
|  | 	page_table_check_pmd_clear(mm, addr, pmd); | ||||||
|  | 
 | ||||||
|  | 	return pmd; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR | #define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR | ||||||
| static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, | static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, | ||||||
| 					unsigned long addr, pud_t *pudp) | 					unsigned long addr, pud_t *pudp) | ||||||
| { | { | ||||||
| 	return native_pudp_get_and_clear(pudp); | 	pud_t pud = native_pudp_get_and_clear(pudp); | ||||||
|  | 
 | ||||||
|  | 	page_table_check_pud_clear(mm, addr, pud); | ||||||
|  | 
 | ||||||
|  | 	return pud; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define __HAVE_ARCH_PMDP_SET_WRPROTECT | #define __HAVE_ARCH_PMDP_SET_WRPROTECT | ||||||
|  | @ -1138,6 +1162,7 @@ static inline int pud_write(pud_t pud) | ||||||
| static inline pmd_t pmdp_establish(struct vm_area_struct *vma, | static inline pmd_t pmdp_establish(struct vm_area_struct *vma, | ||||||
| 		unsigned long address, pmd_t *pmdp, pmd_t pmd) | 		unsigned long address, pmd_t *pmdp, pmd_t pmd) | ||||||
| { | { | ||||||
|  | 	page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd); | ||||||
| 	if (IS_ENABLED(CONFIG_SMP)) { | 	if (IS_ENABLED(CONFIG_SMP)) { | ||||||
| 		return xchg(pmdp, pmd); | 		return xchg(pmdp, pmd); | ||||||
| 	} else { | 	} else { | ||||||
|  |  | ||||||
|  | @ -67,6 +67,7 @@ static unsigned long int get_module_load_offset(void) | ||||||
| 
 | 
 | ||||||
| void *module_alloc(unsigned long size) | void *module_alloc(unsigned long size) | ||||||
| { | { | ||||||
|  | 	gfp_t gfp_mask = GFP_KERNEL; | ||||||
| 	void *p; | 	void *p; | ||||||
| 
 | 
 | ||||||
| 	if (PAGE_ALIGN(size) > MODULES_LEN) | 	if (PAGE_ALIGN(size) > MODULES_LEN) | ||||||
|  | @ -74,10 +75,10 @@ void *module_alloc(unsigned long size) | ||||||
| 
 | 
 | ||||||
| 	p = __vmalloc_node_range(size, MODULE_ALIGN, | 	p = __vmalloc_node_range(size, MODULE_ALIGN, | ||||||
| 				    MODULES_VADDR + get_module_load_offset(), | 				    MODULES_VADDR + get_module_load_offset(), | ||||||
| 				    MODULES_END, GFP_KERNEL, | 				    MODULES_END, gfp_mask, | ||||||
| 				    PAGE_KERNEL, 0, NUMA_NO_NODE, | 				    PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, | ||||||
| 				    __builtin_return_address(0)); | 				    __builtin_return_address(0)); | ||||||
| 	if (p && (kasan_module_alloc(p, size) < 0)) { | 	if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) { | ||||||
| 		vfree(p); | 		vfree(p); | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -1413,8 +1413,7 @@ void do_user_addr_fault(struct pt_regs *regs, | ||||||
| 	 * and if there is a fatal signal pending there is no guarantee | 	 * and if there is a fatal signal pending there is no guarantee | ||||||
| 	 * that we made any progress. Handle this case first. | 	 * that we made any progress. Handle this case first. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (unlikely((fault & VM_FAULT_RETRY) && | 	if (unlikely(fault & VM_FAULT_RETRY)) { | ||||||
| 		     (flags & FAULT_FLAG_ALLOW_RETRY))) { |  | ||||||
| 		flags |= FAULT_FLAG_TRIED; | 		flags |= FAULT_FLAG_TRIED; | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -420,3 +420,4 @@ | ||||||
| # 447 reserved for memfd_secret | # 447 reserved for memfd_secret | ||||||
| 448	common	process_mrelease		sys_process_mrelease | 448	common	process_mrelease		sys_process_mrelease | ||||||
| 449	common  futex_waitv                     sys_futex_waitv | 449	common  futex_waitv                     sys_futex_waitv | ||||||
|  | 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node | ||||||
|  |  | ||||||
|  | @ -127,7 +127,7 @@ void do_page_fault(struct pt_regs *regs) | ||||||
| 			goto do_sigbus; | 			goto do_sigbus; | ||||||
| 		BUG(); | 		BUG(); | ||||||
| 	} | 	} | ||||||
| 	if (flags & FAULT_FLAG_ALLOW_RETRY) { | 
 | ||||||
| 	if (fault & VM_FAULT_RETRY) { | 	if (fault & VM_FAULT_RETRY) { | ||||||
| 		flags |= FAULT_FLAG_TRIED; | 		flags |= FAULT_FLAG_TRIED; | ||||||
| 
 | 
 | ||||||
|  | @ -138,7 +138,6 @@ void do_page_fault(struct pt_regs *regs) | ||||||
| 
 | 
 | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	mmap_read_unlock(mm); | 	mmap_read_unlock(mm); | ||||||
| 	return; | 	return; | ||||||
|  |  | ||||||
|  | @ -1903,14 +1903,7 @@ static struct attribute *zram_disk_attrs[] = { | ||||||
| 	NULL, | 	NULL, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static const struct attribute_group zram_disk_attr_group = { | ATTRIBUTE_GROUPS(zram_disk); | ||||||
| 	.attrs = zram_disk_attrs, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| static const struct attribute_group *zram_disk_attr_groups[] = { |  | ||||||
| 	&zram_disk_attr_group, |  | ||||||
| 	NULL, |  | ||||||
| }; |  | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Allocate and initialize new zram device. the function returns |  * Allocate and initialize new zram device. the function returns | ||||||
|  | @ -1983,7 +1976,7 @@ static int zram_add(void) | ||||||
| 		blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); | 		blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); | ||||||
| 
 | 
 | ||||||
| 	blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); | 	blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); | ||||||
| 	ret = device_add_disk(NULL, zram->disk, zram_disk_attr_groups); | 	ret = device_add_disk(NULL, zram->disk, zram_disk_groups); | ||||||
| 	if (ret) | 	if (ret) | ||||||
| 		goto out_cleanup_disk; | 		goto out_cleanup_disk; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -127,11 +127,35 @@ ATTRIBUTE_GROUPS(dax_drv); | ||||||
| 
 | 
 | ||||||
| static int dax_bus_match(struct device *dev, struct device_driver *drv); | static int dax_bus_match(struct device *dev, struct device_driver *drv); | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Static dax regions are regions created by an external subsystem | ||||||
|  |  * nvdimm where a single range is assigned. Its boundaries are by the external | ||||||
|  |  * subsystem and are usually limited to one physical memory range. For example, | ||||||
|  |  * for PMEM it is usually defined by NVDIMM Namespace boundaries (i.e. a | ||||||
|  |  * single contiguous range) | ||||||
|  |  * | ||||||
|  |  * On dynamic dax regions, the assigned region can be partitioned by dax core | ||||||
|  |  * into multiple subdivisions. A subdivision is represented into one | ||||||
|  |  * /dev/daxN.M device composed by one or more potentially discontiguous ranges. | ||||||
|  |  * | ||||||
|  |  * When allocating a dax region, drivers must set whether it's static | ||||||
|  |  * (IORESOURCE_DAX_STATIC).  On static dax devices, the @pgmap is pre-assigned | ||||||
|  |  * to dax core when calling devm_create_dev_dax(), whereas in dynamic dax | ||||||
|  |  * devices it is NULL but afterwards allocated by dax core on device ->probe(). | ||||||
|  |  * Care is needed to make sure that dynamic dax devices are torn down with a | ||||||
|  |  * cleared @pgmap field (see kill_dev_dax()). | ||||||
|  |  */ | ||||||
| static bool is_static(struct dax_region *dax_region) | static bool is_static(struct dax_region *dax_region) | ||||||
| { | { | ||||||
| 	return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0; | 	return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | bool static_dev_dax(struct dev_dax *dev_dax) | ||||||
|  | { | ||||||
|  | 	return is_static(dev_dax->region); | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(static_dev_dax); | ||||||
|  | 
 | ||||||
| static u64 dev_dax_size(struct dev_dax *dev_dax) | static u64 dev_dax_size(struct dev_dax *dev_dax) | ||||||
| { | { | ||||||
| 	u64 size = 0; | 	u64 size = 0; | ||||||
|  | @ -361,6 +385,14 @@ void kill_dev_dax(struct dev_dax *dev_dax) | ||||||
| 
 | 
 | ||||||
| 	kill_dax(dax_dev); | 	kill_dax(dax_dev); | ||||||
| 	unmap_mapping_range(inode->i_mapping, 0, 0, 1); | 	unmap_mapping_range(inode->i_mapping, 0, 0, 1); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Dynamic dax region have the pgmap allocated via dev_kzalloc() | ||||||
|  | 	 * and thus freed by devm. Clear the pgmap to not have stale pgmap | ||||||
|  | 	 * ranges on probe() from previous reconfigurations of region devices. | ||||||
|  | 	 */ | ||||||
|  | 	if (!static_dev_dax(dev_dax)) | ||||||
|  | 		dev_dax->pgmap = NULL; | ||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(kill_dev_dax); | EXPORT_SYMBOL_GPL(kill_dev_dax); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -39,6 +39,7 @@ int __dax_driver_register(struct dax_device_driver *dax_drv, | ||||||
| 	__dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) | 	__dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) | ||||||
| void dax_driver_unregister(struct dax_device_driver *dax_drv); | void dax_driver_unregister(struct dax_device_driver *dax_drv); | ||||||
| void kill_dev_dax(struct dev_dax *dev_dax); | void kill_dev_dax(struct dev_dax *dev_dax); | ||||||
|  | bool static_dev_dax(struct dev_dax *dev_dax); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * While run_dax() is potentially a generic operation that could be |  * While run_dax() is potentially a generic operation that could be | ||||||
|  |  | ||||||
|  | @ -73,11 +73,39 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, | ||||||
| 	return -1; | 	return -1; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void dax_set_mapping(struct vm_fault *vmf, pfn_t pfn, | ||||||
|  | 			      unsigned long fault_size) | ||||||
|  | { | ||||||
|  | 	unsigned long i, nr_pages = fault_size / PAGE_SIZE; | ||||||
|  | 	struct file *filp = vmf->vma->vm_file; | ||||||
|  | 	struct dev_dax *dev_dax = filp->private_data; | ||||||
|  | 	pgoff_t pgoff; | ||||||
|  | 
 | ||||||
|  | 	/* mapping is only set on the head */ | ||||||
|  | 	if (dev_dax->pgmap->vmemmap_shift) | ||||||
|  | 		nr_pages = 1; | ||||||
|  | 
 | ||||||
|  | 	pgoff = linear_page_index(vmf->vma, | ||||||
|  | 			ALIGN(vmf->address, fault_size)); | ||||||
|  | 
 | ||||||
|  | 	for (i = 0; i < nr_pages; i++) { | ||||||
|  | 		struct page *page = pfn_to_page(pfn_t_to_pfn(pfn) + i); | ||||||
|  | 
 | ||||||
|  | 		page = compound_head(page); | ||||||
|  | 		if (page->mapping) | ||||||
|  | 			continue; | ||||||
|  | 
 | ||||||
|  | 		page->mapping = filp->f_mapping; | ||||||
|  | 		page->index = pgoff + i; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax, | static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax, | ||||||
| 				struct vm_fault *vmf, pfn_t *pfn) | 				struct vm_fault *vmf) | ||||||
| { | { | ||||||
| 	struct device *dev = &dev_dax->dev; | 	struct device *dev = &dev_dax->dev; | ||||||
| 	phys_addr_t phys; | 	phys_addr_t phys; | ||||||
|  | 	pfn_t pfn; | ||||||
| 	unsigned int fault_size = PAGE_SIZE; | 	unsigned int fault_size = PAGE_SIZE; | ||||||
| 
 | 
 | ||||||
| 	if (check_vma(dev_dax, vmf->vma, __func__)) | 	if (check_vma(dev_dax, vmf->vma, __func__)) | ||||||
|  | @ -98,18 +126,21 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax, | ||||||
| 		return VM_FAULT_SIGBUS; | 		return VM_FAULT_SIGBUS; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); | 	pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); | ||||||
| 
 | 
 | ||||||
| 	return vmf_insert_mixed(vmf->vma, vmf->address, *pfn); | 	dax_set_mapping(vmf, pfn, fault_size); | ||||||
|  | 
 | ||||||
|  | 	return vmf_insert_mixed(vmf->vma, vmf->address, pfn); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, | static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, | ||||||
| 				struct vm_fault *vmf, pfn_t *pfn) | 				struct vm_fault *vmf) | ||||||
| { | { | ||||||
| 	unsigned long pmd_addr = vmf->address & PMD_MASK; | 	unsigned long pmd_addr = vmf->address & PMD_MASK; | ||||||
| 	struct device *dev = &dev_dax->dev; | 	struct device *dev = &dev_dax->dev; | ||||||
| 	phys_addr_t phys; | 	phys_addr_t phys; | ||||||
| 	pgoff_t pgoff; | 	pgoff_t pgoff; | ||||||
|  | 	pfn_t pfn; | ||||||
| 	unsigned int fault_size = PMD_SIZE; | 	unsigned int fault_size = PMD_SIZE; | ||||||
| 
 | 
 | ||||||
| 	if (check_vma(dev_dax, vmf->vma, __func__)) | 	if (check_vma(dev_dax, vmf->vma, __func__)) | ||||||
|  | @ -138,19 +169,22 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, | ||||||
| 		return VM_FAULT_SIGBUS; | 		return VM_FAULT_SIGBUS; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); | 	pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); | ||||||
| 
 | 
 | ||||||
| 	return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE); | 	dax_set_mapping(vmf, pfn, fault_size); | ||||||
|  | 
 | ||||||
|  | 	return vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD | #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD | ||||||
| static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, | static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, | ||||||
| 				struct vm_fault *vmf, pfn_t *pfn) | 				struct vm_fault *vmf) | ||||||
| { | { | ||||||
| 	unsigned long pud_addr = vmf->address & PUD_MASK; | 	unsigned long pud_addr = vmf->address & PUD_MASK; | ||||||
| 	struct device *dev = &dev_dax->dev; | 	struct device *dev = &dev_dax->dev; | ||||||
| 	phys_addr_t phys; | 	phys_addr_t phys; | ||||||
| 	pgoff_t pgoff; | 	pgoff_t pgoff; | ||||||
|  | 	pfn_t pfn; | ||||||
| 	unsigned int fault_size = PUD_SIZE; | 	unsigned int fault_size = PUD_SIZE; | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -180,13 +214,15 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, | ||||||
| 		return VM_FAULT_SIGBUS; | 		return VM_FAULT_SIGBUS; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); | 	pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); | ||||||
| 
 | 
 | ||||||
| 	return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE); | 	dax_set_mapping(vmf, pfn, fault_size); | ||||||
|  | 
 | ||||||
|  | 	return vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE); | ||||||
| } | } | ||||||
| #else | #else | ||||||
| static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, | static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, | ||||||
| 				struct vm_fault *vmf, pfn_t *pfn) | 				struct vm_fault *vmf) | ||||||
| { | { | ||||||
| 	return VM_FAULT_FALLBACK; | 	return VM_FAULT_FALLBACK; | ||||||
| } | } | ||||||
|  | @ -196,10 +232,8 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, | ||||||
| 		enum page_entry_size pe_size) | 		enum page_entry_size pe_size) | ||||||
| { | { | ||||||
| 	struct file *filp = vmf->vma->vm_file; | 	struct file *filp = vmf->vma->vm_file; | ||||||
| 	unsigned long fault_size; |  | ||||||
| 	vm_fault_t rc = VM_FAULT_SIGBUS; | 	vm_fault_t rc = VM_FAULT_SIGBUS; | ||||||
| 	int id; | 	int id; | ||||||
| 	pfn_t pfn; |  | ||||||
| 	struct dev_dax *dev_dax = filp->private_data; | 	struct dev_dax *dev_dax = filp->private_data; | ||||||
| 
 | 
 | ||||||
| 	dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm, | 	dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm, | ||||||
|  | @ -209,43 +243,18 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, | ||||||
| 	id = dax_read_lock(); | 	id = dax_read_lock(); | ||||||
| 	switch (pe_size) { | 	switch (pe_size) { | ||||||
| 	case PE_SIZE_PTE: | 	case PE_SIZE_PTE: | ||||||
| 		fault_size = PAGE_SIZE; | 		rc = __dev_dax_pte_fault(dev_dax, vmf); | ||||||
| 		rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn); |  | ||||||
| 		break; | 		break; | ||||||
| 	case PE_SIZE_PMD: | 	case PE_SIZE_PMD: | ||||||
| 		fault_size = PMD_SIZE; | 		rc = __dev_dax_pmd_fault(dev_dax, vmf); | ||||||
| 		rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn); |  | ||||||
| 		break; | 		break; | ||||||
| 	case PE_SIZE_PUD: | 	case PE_SIZE_PUD: | ||||||
| 		fault_size = PUD_SIZE; | 		rc = __dev_dax_pud_fault(dev_dax, vmf); | ||||||
| 		rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn); |  | ||||||
| 		break; | 		break; | ||||||
| 	default: | 	default: | ||||||
| 		rc = VM_FAULT_SIGBUS; | 		rc = VM_FAULT_SIGBUS; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (rc == VM_FAULT_NOPAGE) { |  | ||||||
| 		unsigned long i; |  | ||||||
| 		pgoff_t pgoff; |  | ||||||
| 
 |  | ||||||
| 		/*
 |  | ||||||
| 		 * In the device-dax case the only possibility for a |  | ||||||
| 		 * VM_FAULT_NOPAGE result is when device-dax capacity is |  | ||||||
| 		 * mapped. No need to consider the zero page, or racing |  | ||||||
| 		 * conflicting mappings. |  | ||||||
| 		 */ |  | ||||||
| 		pgoff = linear_page_index(vmf->vma, vmf->address |  | ||||||
| 				& ~(fault_size - 1)); |  | ||||||
| 		for (i = 0; i < fault_size / PAGE_SIZE; i++) { |  | ||||||
| 			struct page *page; |  | ||||||
| 
 |  | ||||||
| 			page = pfn_to_page(pfn_t_to_pfn(pfn) + i); |  | ||||||
| 			if (page->mapping) |  | ||||||
| 				continue; |  | ||||||
| 			page->mapping = filp->f_mapping; |  | ||||||
| 			page->index = pgoff + i; |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	dax_read_unlock(id); | 	dax_read_unlock(id); | ||||||
| 
 | 
 | ||||||
| 	return rc; | 	return rc; | ||||||
|  | @ -398,17 +407,34 @@ int dev_dax_probe(struct dev_dax *dev_dax) | ||||||
| 	void *addr; | 	void *addr; | ||||||
| 	int rc, i; | 	int rc, i; | ||||||
| 
 | 
 | ||||||
| 	pgmap = dev_dax->pgmap; | 	if (static_dev_dax(dev_dax))  { | ||||||
| 	if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1, | 		if (dev_dax->nr_range > 1) { | ||||||
| 			"static pgmap / multi-range device conflict\n")) | 			dev_warn(dev, | ||||||
|  | 				"static pgmap / multi-range device conflict\n"); | ||||||
| 			return -EINVAL; | 			return -EINVAL; | ||||||
|  | 		} | ||||||
| 
 | 
 | ||||||
| 	if (!pgmap) { | 		pgmap = dev_dax->pgmap; | ||||||
| 		pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range) | 	} else { | ||||||
| 				* (dev_dax->nr_range - 1), GFP_KERNEL); | 		if (dev_dax->pgmap) { | ||||||
|  | 			dev_warn(dev, | ||||||
|  | 				 "dynamic-dax with pre-populated page map\n"); | ||||||
|  | 			return -EINVAL; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		pgmap = devm_kzalloc(dev, | ||||||
|  |                        struct_size(pgmap, ranges, dev_dax->nr_range - 1), | ||||||
|  |                        GFP_KERNEL); | ||||||
| 		if (!pgmap) | 		if (!pgmap) | ||||||
| 			return -ENOMEM; | 			return -ENOMEM; | ||||||
|  | 
 | ||||||
| 		pgmap->nr_range = dev_dax->nr_range; | 		pgmap->nr_range = dev_dax->nr_range; | ||||||
|  | 		dev_dax->pgmap = pgmap; | ||||||
|  | 
 | ||||||
|  | 		for (i = 0; i < dev_dax->nr_range; i++) { | ||||||
|  | 			struct range *range = &dev_dax->ranges[i].range; | ||||||
|  | 			pgmap->ranges[i] = *range; | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < dev_dax->nr_range; i++) { | 	for (i = 0; i < dev_dax->nr_range; i++) { | ||||||
|  | @ -420,12 +446,12 @@ int dev_dax_probe(struct dev_dax *dev_dax) | ||||||
| 					i, range->start, range->end); | 					i, range->start, range->end); | ||||||
| 			return -EBUSY; | 			return -EBUSY; | ||||||
| 		} | 		} | ||||||
| 		/* don't update the range for static pgmap */ |  | ||||||
| 		if (!dev_dax->pgmap) |  | ||||||
| 			pgmap->ranges[i] = *range; |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	pgmap->type = MEMORY_DEVICE_GENERIC; | 	pgmap->type = MEMORY_DEVICE_GENERIC; | ||||||
|  | 	if (dev_dax->align > PAGE_SIZE) | ||||||
|  | 		pgmap->vmemmap_shift = | ||||||
|  | 			order_base_2(dev_dax->align >> PAGE_SHIFT); | ||||||
| 	addr = devm_memremap_pages(dev, pgmap); | 	addr = devm_memremap_pages(dev, pgmap); | ||||||
| 	if (IS_ERR(addr)) | 	if (IS_ERR(addr)) | ||||||
| 		return PTR_ERR(addr); | 		return PTR_ERR(addr); | ||||||
|  |  | ||||||
|  | @ -98,15 +98,14 @@ static int siw_create_tx_threads(void) | ||||||
| 			continue; | 			continue; | ||||||
| 
 | 
 | ||||||
| 		siw_tx_thread[cpu] = | 		siw_tx_thread[cpu] = | ||||||
| 			kthread_create(siw_run_sq, (unsigned long *)(long)cpu, | 			kthread_run_on_cpu(siw_run_sq, | ||||||
| 				       "siw_tx/%d", cpu); | 					   (unsigned long *)(long)cpu, | ||||||
|  | 					   cpu, "siw_tx/%u"); | ||||||
| 		if (IS_ERR(siw_tx_thread[cpu])) { | 		if (IS_ERR(siw_tx_thread[cpu])) { | ||||||
| 			siw_tx_thread[cpu] = NULL; | 			siw_tx_thread[cpu] = NULL; | ||||||
| 			continue; | 			continue; | ||||||
| 		} | 		} | ||||||
| 		kthread_bind(siw_tx_thread[cpu], cpu); |  | ||||||
| 
 | 
 | ||||||
| 		wake_up_process(siw_tx_thread[cpu]); |  | ||||||
| 		assigned++; | 		assigned++; | ||||||
| 	} | 	} | ||||||
| 	return assigned; | 	return assigned; | ||||||
|  |  | ||||||
|  | @ -26,6 +26,7 @@ | ||||||
| #include <linux/serial_core.h> | #include <linux/serial_core.h> | ||||||
| #include <linux/sysfs.h> | #include <linux/sysfs.h> | ||||||
| #include <linux/random.h> | #include <linux/random.h> | ||||||
|  | #include <linux/kmemleak.h> | ||||||
| 
 | 
 | ||||||
| #include <asm/setup.h>  /* for COMMAND_LINE_SIZE */ | #include <asm/setup.h>  /* for COMMAND_LINE_SIZE */ | ||||||
| #include <asm/page.h> | #include <asm/page.h> | ||||||
|  | @ -524,9 +525,12 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, | ||||||
| 		size = dt_mem_next_cell(dt_root_size_cells, &prop); | 		size = dt_mem_next_cell(dt_root_size_cells, &prop); | ||||||
| 
 | 
 | ||||||
| 		if (size && | 		if (size && | ||||||
| 		    early_init_dt_reserve_memory_arch(base, size, nomap) == 0) | 		    early_init_dt_reserve_memory_arch(base, size, nomap) == 0) { | ||||||
| 			pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n", | 			pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n", | ||||||
| 				uname, &base, (unsigned long)(size / SZ_1M)); | 				uname, &base, (unsigned long)(size / SZ_1M)); | ||||||
|  | 			if (!nomap) | ||||||
|  | 				kmemleak_alloc_phys(base, size, 0, 0); | ||||||
|  | 		} | ||||||
| 		else | 		else | ||||||
| 			pr_info("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n", | 			pr_info("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n", | ||||||
| 				uname, &base, (unsigned long)(size / SZ_1M)); | 				uname, &base, (unsigned long)(size / SZ_1M)); | ||||||
|  |  | ||||||
|  | @ -27,8 +27,8 @@ | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| #include <linux/uaccess.h> | #include <linux/uaccess.h> | ||||||
| #include <linux/fiemap.h> | #include <linux/fiemap.h> | ||||||
| #include <linux/backing-dev.h> |  | ||||||
| #include <linux/iomap.h> | #include <linux/iomap.h> | ||||||
|  | #include <linux/sched/mm.h> | ||||||
| #include "ext4_jbd2.h" | #include "ext4_jbd2.h" | ||||||
| #include "ext4_extents.h" | #include "ext4_extents.h" | ||||||
| #include "xattr.h" | #include "xattr.h" | ||||||
|  | @ -4404,8 +4404,7 @@ int ext4_ext_truncate(handle_t *handle, struct inode *inode) | ||||||
| 	err = ext4_es_remove_extent(inode, last_block, | 	err = ext4_es_remove_extent(inode, last_block, | ||||||
| 				    EXT_MAX_BLOCKS - last_block); | 				    EXT_MAX_BLOCKS - last_block); | ||||||
| 	if (err == -ENOMEM) { | 	if (err == -ENOMEM) { | ||||||
| 		cond_resched(); | 		memalloc_retry_wait(GFP_ATOMIC); | ||||||
| 		congestion_wait(BLK_RW_ASYNC, HZ/50); |  | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 	if (err) | 	if (err) | ||||||
|  | @ -4413,8 +4412,7 @@ int ext4_ext_truncate(handle_t *handle, struct inode *inode) | ||||||
| retry_remove_space: | retry_remove_space: | ||||||
| 	err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); | 	err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); | ||||||
| 	if (err == -ENOMEM) { | 	if (err == -ENOMEM) { | ||||||
| 		cond_resched(); | 		memalloc_retry_wait(GFP_ATOMIC); | ||||||
| 		congestion_wait(BLK_RW_ASYNC, HZ/50); |  | ||||||
| 		goto retry_remove_space; | 		goto retry_remove_space; | ||||||
| 	} | 	} | ||||||
| 	return err; | 	return err; | ||||||
|  |  | ||||||
|  | @ -7,7 +7,7 @@ | ||||||
| #include <linux/iomap.h> | #include <linux/iomap.h> | ||||||
| #include <linux/fiemap.h> | #include <linux/fiemap.h> | ||||||
| #include <linux/iversion.h> | #include <linux/iversion.h> | ||||||
| #include <linux/backing-dev.h> | #include <linux/sched/mm.h> | ||||||
| 
 | 
 | ||||||
| #include "ext4_jbd2.h" | #include "ext4_jbd2.h" | ||||||
| #include "ext4.h" | #include "ext4.h" | ||||||
|  | @ -1929,8 +1929,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline) | ||||||
| retry: | retry: | ||||||
| 			err = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); | 			err = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); | ||||||
| 			if (err == -ENOMEM) { | 			if (err == -ENOMEM) { | ||||||
| 				cond_resched(); | 				memalloc_retry_wait(GFP_ATOMIC); | ||||||
| 				congestion_wait(BLK_RW_ASYNC, HZ/50); |  | ||||||
| 				goto retry; | 				goto retry; | ||||||
| 			} | 			} | ||||||
| 			if (err) | 			if (err) | ||||||
|  |  | ||||||
|  | @ -24,7 +24,7 @@ | ||||||
| #include <linux/kernel.h> | #include <linux/kernel.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| #include <linux/mm.h> | #include <linux/mm.h> | ||||||
| #include <linux/backing-dev.h> | #include <linux/sched/mm.h> | ||||||
| 
 | 
 | ||||||
| #include "ext4_jbd2.h" | #include "ext4_jbd2.h" | ||||||
| #include "xattr.h" | #include "xattr.h" | ||||||
|  | @ -523,12 +523,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | ||||||
| 			ret = PTR_ERR(bounce_page); | 			ret = PTR_ERR(bounce_page); | ||||||
| 			if (ret == -ENOMEM && | 			if (ret == -ENOMEM && | ||||||
| 			    (io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) { | 			    (io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) { | ||||||
| 				gfp_flags = GFP_NOFS; | 				gfp_t new_gfp_flags = GFP_NOFS; | ||||||
| 				if (io->io_bio) | 				if (io->io_bio) | ||||||
| 					ext4_io_submit(io); | 					ext4_io_submit(io); | ||||||
| 				else | 				else | ||||||
| 					gfp_flags |= __GFP_NOFAIL; | 					new_gfp_flags |= __GFP_NOFAIL; | ||||||
| 				congestion_wait(BLK_RW_ASYNC, HZ/50); | 				memalloc_retry_wait(gfp_flags); | ||||||
|  | 				gfp_flags = new_gfp_flags; | ||||||
| 				goto retry_encrypt; | 				goto retry_encrypt; | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -8,9 +8,9 @@ | ||||||
| #include <linux/fs.h> | #include <linux/fs.h> | ||||||
| #include <linux/f2fs_fs.h> | #include <linux/f2fs_fs.h> | ||||||
| #include <linux/buffer_head.h> | #include <linux/buffer_head.h> | ||||||
|  | #include <linux/sched/mm.h> | ||||||
| #include <linux/mpage.h> | #include <linux/mpage.h> | ||||||
| #include <linux/writeback.h> | #include <linux/writeback.h> | ||||||
| #include <linux/backing-dev.h> |  | ||||||
| #include <linux/pagevec.h> | #include <linux/pagevec.h> | ||||||
| #include <linux/blkdev.h> | #include <linux/blkdev.h> | ||||||
| #include <linux/bio.h> | #include <linux/bio.h> | ||||||
|  | @ -2542,7 +2542,7 @@ int f2fs_encrypt_one_page(struct f2fs_io_info *fio) | ||||||
| 		/* flush pending IOs and wait for a while in the ENOMEM case */ | 		/* flush pending IOs and wait for a while in the ENOMEM case */ | ||||||
| 		if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { | 		if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { | ||||||
| 			f2fs_flush_merged_writes(fio->sbi); | 			f2fs_flush_merged_writes(fio->sbi); | ||||||
| 			congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); | 			memalloc_retry_wait(GFP_NOFS); | ||||||
| 			gfp_flags |= __GFP_NOFAIL; | 			gfp_flags |= __GFP_NOFAIL; | ||||||
| 			goto retry_encrypt; | 			goto retry_encrypt; | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
|  | @ -7,7 +7,6 @@ | ||||||
|  */ |  */ | ||||||
| #include <linux/fs.h> | #include <linux/fs.h> | ||||||
| #include <linux/module.h> | #include <linux/module.h> | ||||||
| #include <linux/backing-dev.h> |  | ||||||
| #include <linux/init.h> | #include <linux/init.h> | ||||||
| #include <linux/f2fs_fs.h> | #include <linux/f2fs_fs.h> | ||||||
| #include <linux/kthread.h> | #include <linux/kthread.h> | ||||||
|  | @ -15,6 +14,7 @@ | ||||||
| #include <linux/freezer.h> | #include <linux/freezer.h> | ||||||
| #include <linux/sched/signal.h> | #include <linux/sched/signal.h> | ||||||
| #include <linux/random.h> | #include <linux/random.h> | ||||||
|  | #include <linux/sched/mm.h> | ||||||
| 
 | 
 | ||||||
| #include "f2fs.h" | #include "f2fs.h" | ||||||
| #include "node.h" | #include "node.h" | ||||||
|  | @ -1375,8 +1375,7 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, | ||||||
| 		if (err) { | 		if (err) { | ||||||
| 			clear_page_private_gcing(page); | 			clear_page_private_gcing(page); | ||||||
| 			if (err == -ENOMEM) { | 			if (err == -ENOMEM) { | ||||||
| 				congestion_wait(BLK_RW_ASYNC, | 				memalloc_retry_wait(GFP_NOFS); | ||||||
| 						DEFAULT_IO_TIMEOUT); |  | ||||||
| 				goto retry; | 				goto retry; | ||||||
| 			} | 			} | ||||||
| 			if (is_dirty) | 			if (is_dirty) | ||||||
|  |  | ||||||
|  | @ -8,8 +8,8 @@ | ||||||
| #include <linux/fs.h> | #include <linux/fs.h> | ||||||
| #include <linux/f2fs_fs.h> | #include <linux/f2fs_fs.h> | ||||||
| #include <linux/buffer_head.h> | #include <linux/buffer_head.h> | ||||||
| #include <linux/backing-dev.h> |  | ||||||
| #include <linux/writeback.h> | #include <linux/writeback.h> | ||||||
|  | #include <linux/sched/mm.h> | ||||||
| 
 | 
 | ||||||
| #include "f2fs.h" | #include "f2fs.h" | ||||||
| #include "node.h" | #include "node.h" | ||||||
|  | @ -562,7 +562,7 @@ struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino) | ||||||
| 	inode = f2fs_iget(sb, ino); | 	inode = f2fs_iget(sb, ino); | ||||||
| 	if (IS_ERR(inode)) { | 	if (IS_ERR(inode)) { | ||||||
| 		if (PTR_ERR(inode) == -ENOMEM) { | 		if (PTR_ERR(inode) == -ENOMEM) { | ||||||
| 			congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); | 			memalloc_retry_wait(GFP_NOFS); | ||||||
| 			goto retry; | 			goto retry; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -8,7 +8,7 @@ | ||||||
| #include <linux/fs.h> | #include <linux/fs.h> | ||||||
| #include <linux/f2fs_fs.h> | #include <linux/f2fs_fs.h> | ||||||
| #include <linux/mpage.h> | #include <linux/mpage.h> | ||||||
| #include <linux/backing-dev.h> | #include <linux/sched/mm.h> | ||||||
| #include <linux/blkdev.h> | #include <linux/blkdev.h> | ||||||
| #include <linux/pagevec.h> | #include <linux/pagevec.h> | ||||||
| #include <linux/swap.h> | #include <linux/swap.h> | ||||||
|  | @ -2750,7 +2750,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) | ||||||
| retry: | retry: | ||||||
| 	ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false); | 	ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false); | ||||||
| 	if (!ipage) { | 	if (!ipage) { | ||||||
| 		congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); | 		memalloc_retry_wait(GFP_NOFS); | ||||||
| 		goto retry; | 		goto retry; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -8,6 +8,7 @@ | ||||||
| #include <asm/unaligned.h> | #include <asm/unaligned.h> | ||||||
| #include <linux/fs.h> | #include <linux/fs.h> | ||||||
| #include <linux/f2fs_fs.h> | #include <linux/f2fs_fs.h> | ||||||
|  | #include <linux/sched/mm.h> | ||||||
| #include "f2fs.h" | #include "f2fs.h" | ||||||
| #include "node.h" | #include "node.h" | ||||||
| #include "segment.h" | #include "segment.h" | ||||||
|  | @ -587,7 +588,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | ||||||
| 	err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE); | 	err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE); | ||||||
| 	if (err) { | 	if (err) { | ||||||
| 		if (err == -ENOMEM) { | 		if (err == -ENOMEM) { | ||||||
| 			congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); | 			memalloc_retry_wait(GFP_NOFS); | ||||||
| 			goto retry_dn; | 			goto retry_dn; | ||||||
| 		} | 		} | ||||||
| 		goto out; | 		goto out; | ||||||
|  | @ -670,8 +671,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | ||||||
| 			err = check_index_in_prev_nodes(sbi, dest, &dn); | 			err = check_index_in_prev_nodes(sbi, dest, &dn); | ||||||
| 			if (err) { | 			if (err) { | ||||||
| 				if (err == -ENOMEM) { | 				if (err == -ENOMEM) { | ||||||
| 					congestion_wait(BLK_RW_ASYNC, | 					memalloc_retry_wait(GFP_NOFS); | ||||||
| 							DEFAULT_IO_TIMEOUT); |  | ||||||
| 					goto retry_prev; | 					goto retry_prev; | ||||||
| 				} | 				} | ||||||
| 				goto err; | 				goto err; | ||||||
|  |  | ||||||
|  | @ -9,6 +9,7 @@ | ||||||
| #include <linux/f2fs_fs.h> | #include <linux/f2fs_fs.h> | ||||||
| #include <linux/bio.h> | #include <linux/bio.h> | ||||||
| #include <linux/blkdev.h> | #include <linux/blkdev.h> | ||||||
|  | #include <linux/sched/mm.h> | ||||||
| #include <linux/prefetch.h> | #include <linux/prefetch.h> | ||||||
| #include <linux/kthread.h> | #include <linux/kthread.h> | ||||||
| #include <linux/swap.h> | #include <linux/swap.h> | ||||||
|  | @ -245,9 +246,7 @@ static int __revoke_inmem_pages(struct inode *inode, | ||||||
| 								LOOKUP_NODE); | 								LOOKUP_NODE); | ||||||
| 			if (err) { | 			if (err) { | ||||||
| 				if (err == -ENOMEM) { | 				if (err == -ENOMEM) { | ||||||
| 					congestion_wait(BLK_RW_ASYNC, | 					memalloc_retry_wait(GFP_NOFS); | ||||||
| 							DEFAULT_IO_TIMEOUT); |  | ||||||
| 					cond_resched(); |  | ||||||
| 					goto retry; | 					goto retry; | ||||||
| 				} | 				} | ||||||
| 				err = -EAGAIN; | 				err = -EAGAIN; | ||||||
|  | @ -424,9 +423,7 @@ static int __f2fs_commit_inmem_pages(struct inode *inode) | ||||||
| 			err = f2fs_do_write_data_page(&fio); | 			err = f2fs_do_write_data_page(&fio); | ||||||
| 			if (err) { | 			if (err) { | ||||||
| 				if (err == -ENOMEM) { | 				if (err == -ENOMEM) { | ||||||
| 					congestion_wait(BLK_RW_ASYNC, | 					memalloc_retry_wait(GFP_NOFS); | ||||||
| 							DEFAULT_IO_TIMEOUT); |  | ||||||
| 					cond_resched(); |  | ||||||
| 					goto retry; | 					goto retry; | ||||||
| 				} | 				} | ||||||
| 				unlock_page(page); | 				unlock_page(page); | ||||||
|  |  | ||||||
|  | @ -8,9 +8,9 @@ | ||||||
| #include <linux/module.h> | #include <linux/module.h> | ||||||
| #include <linux/init.h> | #include <linux/init.h> | ||||||
| #include <linux/fs.h> | #include <linux/fs.h> | ||||||
|  | #include <linux/sched/mm.h> | ||||||
| #include <linux/statfs.h> | #include <linux/statfs.h> | ||||||
| #include <linux/buffer_head.h> | #include <linux/buffer_head.h> | ||||||
| #include <linux/backing-dev.h> |  | ||||||
| #include <linux/kthread.h> | #include <linux/kthread.h> | ||||||
| #include <linux/parser.h> | #include <linux/parser.h> | ||||||
| #include <linux/mount.h> | #include <linux/mount.h> | ||||||
|  | @ -2415,8 +2415,7 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, | ||||||
| 		page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS); | 		page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS); | ||||||
| 		if (IS_ERR(page)) { | 		if (IS_ERR(page)) { | ||||||
| 			if (PTR_ERR(page) == -ENOMEM) { | 			if (PTR_ERR(page) == -ENOMEM) { | ||||||
| 				congestion_wait(BLK_RW_ASYNC, | 				memalloc_retry_wait(GFP_NOFS); | ||||||
| 						DEFAULT_IO_TIMEOUT); |  | ||||||
| 				goto repeat; | 				goto repeat; | ||||||
| 			} | 			} | ||||||
| 			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); | 			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); | ||||||
|  |  | ||||||
|  | @ -409,10 +409,11 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end) | ||||||
| 	struct vm_area_struct *vma; | 	struct vm_area_struct *vma; | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * end == 0 indicates that the entire range after | 	 * end == 0 indicates that the entire range after start should be | ||||||
| 	 * start should be unmapped. | 	 * unmapped.  Note, end is exclusive, whereas the interval tree takes | ||||||
|  | 	 * an inclusive "last". | ||||||
| 	 */ | 	 */ | ||||||
| 	vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) { | 	vma_interval_tree_foreach(vma, root, start, end ? end - 1 : ULONG_MAX) { | ||||||
| 		unsigned long v_offset; | 		unsigned long v_offset; | ||||||
| 		unsigned long v_end; | 		unsigned long v_end; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										49
									
								
								fs/inode.c
									
									
									
									
									
								
							
							
						
						
									
										49
									
								
								fs/inode.c
									
									
									
									
									
								
							|  | @ -526,6 +526,55 @@ void __remove_inode_hash(struct inode *inode) | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(__remove_inode_hash); | EXPORT_SYMBOL(__remove_inode_hash); | ||||||
| 
 | 
 | ||||||
|  | void dump_mapping(const struct address_space *mapping) | ||||||
|  | { | ||||||
|  | 	struct inode *host; | ||||||
|  | 	const struct address_space_operations *a_ops; | ||||||
|  | 	struct hlist_node *dentry_first; | ||||||
|  | 	struct dentry *dentry_ptr; | ||||||
|  | 	struct dentry dentry; | ||||||
|  | 	unsigned long ino; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * If mapping is an invalid pointer, we don't want to crash | ||||||
|  | 	 * accessing it, so probe everything depending on it carefully. | ||||||
|  | 	 */ | ||||||
|  | 	if (get_kernel_nofault(host, &mapping->host) || | ||||||
|  | 	    get_kernel_nofault(a_ops, &mapping->a_ops)) { | ||||||
|  | 		pr_warn("invalid mapping:%px\n", mapping); | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (!host) { | ||||||
|  | 		pr_warn("aops:%ps\n", a_ops); | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (get_kernel_nofault(dentry_first, &host->i_dentry.first) || | ||||||
|  | 	    get_kernel_nofault(ino, &host->i_ino)) { | ||||||
|  | 		pr_warn("aops:%ps invalid inode:%px\n", a_ops, host); | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (!dentry_first) { | ||||||
|  | 		pr_warn("aops:%ps ino:%lx\n", a_ops, ino); | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias); | ||||||
|  | 	if (get_kernel_nofault(dentry, dentry_ptr)) { | ||||||
|  | 		pr_warn("aops:%ps ino:%lx invalid dentry:%px\n", | ||||||
|  | 				a_ops, ino, dentry_ptr); | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * if dentry is corrupted, the %pd handler may still crash, | ||||||
|  | 	 * but it's unlikely that we reach here with a corrupt mapping | ||||||
|  | 	 */ | ||||||
|  | 	pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n", a_ops, ino, &dentry); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void clear_inode(struct inode *inode) | void clear_inode(struct inode *inode) | ||||||
| { | { | ||||||
| 	/*
 | 	/*
 | ||||||
|  |  | ||||||
|  | @ -430,7 +430,7 @@ static int ioctl_file_dedupe_range(struct file *file, | ||||||
| 		goto out; | 		goto out; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	size = offsetof(struct file_dedupe_range __user, info[count]); | 	size = offsetof(struct file_dedupe_range, info[count]); | ||||||
| 	if (size > PAGE_SIZE) { | 	if (size > PAGE_SIZE) { | ||||||
| 		ret = -ENOMEM; | 		ret = -ENOMEM; | ||||||
| 		goto out; | 		goto out; | ||||||
|  |  | ||||||
|  | @ -1,5 +1,5 @@ | ||||||
| // SPDX-License-Identifier: GPL-2.0-or-later
 | // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||||
| /**
 | /*
 | ||||||
|  * attrib.c - NTFS attribute operations.  Part of the Linux-NTFS project. |  * attrib.c - NTFS attribute operations.  Part of the Linux-NTFS project. | ||||||
|  * |  * | ||||||
|  * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. |  * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. | ||||||
|  |  | ||||||
|  | @ -2040,7 +2040,7 @@ static void ocfs2_complete_edge_insert(handle_t *handle, | ||||||
| 	int i, idx; | 	int i, idx; | ||||||
| 	struct ocfs2_extent_list *el, *left_el, *right_el; | 	struct ocfs2_extent_list *el, *left_el, *right_el; | ||||||
| 	struct ocfs2_extent_rec *left_rec, *right_rec; | 	struct ocfs2_extent_rec *left_rec, *right_rec; | ||||||
| 	struct buffer_head *root_bh = left_path->p_node[subtree_index].bh; | 	struct buffer_head *root_bh; | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Update the counts and position values within all the | 	 * Update the counts and position values within all the | ||||||
|  |  | ||||||
|  | @ -1799,23 +1799,23 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | ||||||
| 	 */ | 	 */ | ||||||
| 	ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len, | 	ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len, | ||||||
| 					 cluster_of_pages, mmap_page); | 					 cluster_of_pages, mmap_page); | ||||||
| 	if (ret && ret != -EAGAIN) { | 	if (ret) { | ||||||
| 		mlog_errno(ret); |  | ||||||
| 		goto out_quota; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock | 		 * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock | ||||||
| 		 * the target page. In this case, we exit with no error and no target | 		 * the target page. In this case, we exit with no error and no target | ||||||
| 		 * page. This will trigger the caller, page_mkwrite(), to re-try | 		 * page. This will trigger the caller, page_mkwrite(), to re-try | ||||||
| 		 * the operation. | 		 * the operation. | ||||||
| 		 */ | 		 */ | ||||||
| 	if (ret == -EAGAIN) { | 		if (type == OCFS2_WRITE_MMAP && ret == -EAGAIN) { | ||||||
| 			BUG_ON(wc->w_target_page); | 			BUG_ON(wc->w_target_page); | ||||||
| 			ret = 0; | 			ret = 0; | ||||||
| 			goto out_quota; | 			goto out_quota; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
|  | 		mlog_errno(ret); | ||||||
|  | 		goto out_quota; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, | 	ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, | ||||||
| 					  len); | 					  len); | ||||||
| 	if (ret) { | 	if (ret) { | ||||||
|  |  | ||||||
|  | @ -120,7 +120,8 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = { | ||||||
| 	define_mask(KTHREAD), | 	define_mask(KTHREAD), | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, }; | static struct attribute *mlog_default_attrs[MLOG_MAX_BITS] = {NULL, }; | ||||||
|  | ATTRIBUTE_GROUPS(mlog_default); | ||||||
| 
 | 
 | ||||||
| static ssize_t mlog_show(struct kobject *obj, struct attribute *attr, | static ssize_t mlog_show(struct kobject *obj, struct attribute *attr, | ||||||
| 			 char *buf) | 			 char *buf) | ||||||
|  | @ -144,7 +145,7 @@ static const struct sysfs_ops mlog_attr_ops = { | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static struct kobj_type mlog_ktype = { | static struct kobj_type mlog_ktype = { | ||||||
| 	.default_attrs = mlog_attr_ptrs, | 	.default_groups = mlog_default_groups, | ||||||
| 	.sysfs_ops      = &mlog_attr_ops, | 	.sysfs_ops      = &mlog_attr_ops, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | @ -157,10 +158,10 @@ int mlog_sys_init(struct kset *o2cb_kset) | ||||||
| 	int i = 0; | 	int i = 0; | ||||||
| 
 | 
 | ||||||
| 	while (mlog_attrs[i].attr.mode) { | 	while (mlog_attrs[i].attr.mode) { | ||||||
| 		mlog_attr_ptrs[i] = &mlog_attrs[i].attr; | 		mlog_default_attrs[i] = &mlog_attrs[i].attr; | ||||||
| 		i++; | 		i++; | ||||||
| 	} | 	} | ||||||
| 	mlog_attr_ptrs[i] = NULL; | 	mlog_default_attrs[i] = NULL; | ||||||
| 
 | 
 | ||||||
| 	kobject_set_name(&mlog_kset.kobj, "logmask"); | 	kobject_set_name(&mlog_kset.kobj, "logmask"); | ||||||
| 	mlog_kset.kobj.kset = o2cb_kset; | 	mlog_kset.kobj.kset = o2cb_kset; | ||||||
|  |  | ||||||
|  | @ -3343,7 +3343,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh, | ||||||
| 	struct ocfs2_dir_entry *de, *last_de = NULL; | 	struct ocfs2_dir_entry *de, *last_de = NULL; | ||||||
| 	char *de_buf, *limit; | 	char *de_buf, *limit; | ||||||
| 	unsigned long offset = 0; | 	unsigned long offset = 0; | ||||||
| 	unsigned int rec_len, new_rec_len, free_space = dir->i_sb->s_blocksize; | 	unsigned int rec_len, new_rec_len, free_space; | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * This calculates how many free bytes we'd have in block zero, should | 	 * This calculates how many free bytes we'd have in block zero, should | ||||||
|  |  | ||||||
|  | @ -94,6 +94,7 @@ static struct attribute *ocfs2_filecheck_attrs[] = { | ||||||
| 	&ocfs2_filecheck_attr_set.attr, | 	&ocfs2_filecheck_attr_set.attr, | ||||||
| 	NULL | 	NULL | ||||||
| }; | }; | ||||||
|  | ATTRIBUTE_GROUPS(ocfs2_filecheck); | ||||||
| 
 | 
 | ||||||
| static void ocfs2_filecheck_release(struct kobject *kobj) | static void ocfs2_filecheck_release(struct kobject *kobj) | ||||||
| { | { | ||||||
|  | @ -138,7 +139,7 @@ static const struct sysfs_ops ocfs2_filecheck_ops = { | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static struct kobj_type ocfs2_ktype_filecheck = { | static struct kobj_type ocfs2_ktype_filecheck = { | ||||||
| 	.default_attrs = ocfs2_filecheck_attrs, | 	.default_groups = ocfs2_filecheck_groups, | ||||||
| 	.sysfs_ops = &ocfs2_filecheck_ops, | 	.sysfs_ops = &ocfs2_filecheck_ops, | ||||||
| 	.release = ocfs2_filecheck_release, | 	.release = ocfs2_filecheck_release, | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -1669,8 +1669,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | ||||||
| 	status = jbd2_journal_load(journal); | 	status = jbd2_journal_load(journal); | ||||||
| 	if (status < 0) { | 	if (status < 0) { | ||||||
| 		mlog_errno(status); | 		mlog_errno(status); | ||||||
| 		if (!igrab(inode)) | 		BUG_ON(!igrab(inode)); | ||||||
| 			BUG(); |  | ||||||
| 		jbd2_journal_destroy(journal); | 		jbd2_journal_destroy(journal); | ||||||
| 		goto done; | 		goto done; | ||||||
| 	} | 	} | ||||||
|  | @ -1699,8 +1698,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | ||||||
| 	if (status < 0) | 	if (status < 0) | ||||||
| 		mlog_errno(status); | 		mlog_errno(status); | ||||||
| 
 | 
 | ||||||
| 	if (!igrab(inode)) | 	BUG_ON(!igrab(inode)); | ||||||
| 		BUG(); |  | ||||||
| 
 | 
 | ||||||
| 	jbd2_journal_destroy(journal); | 	jbd2_journal_destroy(journal); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,6 +1,7 @@ | ||||||
| // SPDX-License-Identifier: GPL-2.0
 | // SPDX-License-Identifier: GPL-2.0
 | ||||||
| #include <linux/pagewalk.h> | #include <linux/pagewalk.h> | ||||||
| #include <linux/vmacache.h> | #include <linux/vmacache.h> | ||||||
|  | #include <linux/mm_inline.h> | ||||||
| #include <linux/hugetlb.h> | #include <linux/hugetlb.h> | ||||||
| #include <linux/huge_mm.h> | #include <linux/huge_mm.h> | ||||||
| #include <linux/mount.h> | #include <linux/mount.h> | ||||||
|  | @ -308,6 +309,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | ||||||
| 
 | 
 | ||||||
| 	name = arch_vma_name(vma); | 	name = arch_vma_name(vma); | ||||||
| 	if (!name) { | 	if (!name) { | ||||||
|  | 		const char *anon_name; | ||||||
|  | 
 | ||||||
| 		if (!mm) { | 		if (!mm) { | ||||||
| 			name = "[vdso]"; | 			name = "[vdso]"; | ||||||
| 			goto done; | 			goto done; | ||||||
|  | @ -319,8 +322,16 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | ||||||
| 			goto done; | 			goto done; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		if (is_stack(vma)) | 		if (is_stack(vma)) { | ||||||
| 			name = "[stack]"; | 			name = "[stack]"; | ||||||
|  | 			goto done; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		anon_name = vma_anon_name(vma); | ||||||
|  | 		if (anon_name) { | ||||||
|  | 			seq_pad(m, ' '); | ||||||
|  | 			seq_printf(m, "[anon:%s]", anon_name); | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| done: | done: | ||||||
|  |  | ||||||
|  | @ -29,6 +29,7 @@ | ||||||
| #include <linux/module.h> | #include <linux/module.h> | ||||||
| #include <linux/magic.h> | #include <linux/magic.h> | ||||||
| #include <linux/xattr.h> | #include <linux/xattr.h> | ||||||
|  | #include <linux/backing-dev.h> | ||||||
| 
 | 
 | ||||||
| #include "squashfs_fs.h" | #include "squashfs_fs.h" | ||||||
| #include "squashfs_fs_sb.h" | #include "squashfs_fs_sb.h" | ||||||
|  | @ -112,6 +113,24 @@ static const struct squashfs_decompressor *supported_squashfs_filesystem( | ||||||
| 	return decompressor; | 	return decompressor; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static int squashfs_bdi_init(struct super_block *sb) | ||||||
|  | { | ||||||
|  | 	int err; | ||||||
|  | 	unsigned int major = MAJOR(sb->s_dev); | ||||||
|  | 	unsigned int minor = MINOR(sb->s_dev); | ||||||
|  | 
 | ||||||
|  | 	bdi_put(sb->s_bdi); | ||||||
|  | 	sb->s_bdi = &noop_backing_dev_info; | ||||||
|  | 
 | ||||||
|  | 	err = super_setup_bdi_name(sb, "squashfs_%u_%u", major, minor); | ||||||
|  | 	if (err) | ||||||
|  | 		return err; | ||||||
|  | 
 | ||||||
|  | 	sb->s_bdi->ra_pages = 0; | ||||||
|  | 	sb->s_bdi->io_pages = 0; | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) | static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) | ||||||
| { | { | ||||||
|  | @ -127,6 +146,20 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) | ||||||
| 
 | 
 | ||||||
| 	TRACE("Entered squashfs_fill_superblock\n"); | 	TRACE("Entered squashfs_fill_superblock\n"); | ||||||
| 
 | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * squashfs provides 'backing_dev_info' in order to disable read-ahead. For | ||||||
|  | 	 * squashfs, I/O is not deferred, it is done immediately in readpage, | ||||||
|  | 	 * which means the user would always have to wait their own I/O. So the effect | ||||||
|  | 	 * of readahead is very weak for squashfs. squashfs_bdi_init will set | ||||||
|  | 	 * sb->s_bdi->ra_pages and sb->s_bdi->io_pages to 0 and close readahead for | ||||||
|  | 	 * squashfs. | ||||||
|  | 	 */ | ||||||
|  | 	err = squashfs_bdi_init(sb); | ||||||
|  | 	if (err) { | ||||||
|  | 		errorf(fc, "squashfs init bdi failed"); | ||||||
|  | 		return err; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL); | 	sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL); | ||||||
| 	if (sb->s_fs_info == NULL) { | 	if (sb->s_fs_info == NULL) { | ||||||
| 		ERROR("Failed to allocate squashfs_sb_info\n"); | 		ERROR("Failed to allocate squashfs_sb_info\n"); | ||||||
|  |  | ||||||
|  | @ -15,6 +15,7 @@ | ||||||
| #include <linux/sched/signal.h> | #include <linux/sched/signal.h> | ||||||
| #include <linux/sched/mm.h> | #include <linux/sched/mm.h> | ||||||
| #include <linux/mm.h> | #include <linux/mm.h> | ||||||
|  | #include <linux/mm_inline.h> | ||||||
| #include <linux/mmu_notifier.h> | #include <linux/mmu_notifier.h> | ||||||
| #include <linux/poll.h> | #include <linux/poll.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
|  | @ -877,7 +878,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) | ||||||
| 				 new_flags, vma->anon_vma, | 				 new_flags, vma->anon_vma, | ||||||
| 				 vma->vm_file, vma->vm_pgoff, | 				 vma->vm_file, vma->vm_pgoff, | ||||||
| 				 vma_policy(vma), | 				 vma_policy(vma), | ||||||
| 				 NULL_VM_UFFD_CTX); | 				 NULL_VM_UFFD_CTX, vma_anon_name(vma)); | ||||||
| 		if (prev) | 		if (prev) | ||||||
| 			vma = prev; | 			vma = prev; | ||||||
| 		else | 		else | ||||||
|  | @ -1436,7 +1437,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, | ||||||
| 		prev = vma_merge(mm, prev, start, vma_end, new_flags, | 		prev = vma_merge(mm, prev, start, vma_end, new_flags, | ||||||
| 				 vma->anon_vma, vma->vm_file, vma->vm_pgoff, | 				 vma->anon_vma, vma->vm_file, vma->vm_pgoff, | ||||||
| 				 vma_policy(vma), | 				 vma_policy(vma), | ||||||
| 				 ((struct vm_userfaultfd_ctx){ ctx })); | 				 ((struct vm_userfaultfd_ctx){ ctx }), | ||||||
|  | 				 vma_anon_name(vma)); | ||||||
| 		if (prev) { | 		if (prev) { | ||||||
| 			vma = prev; | 			vma = prev; | ||||||
| 			goto next; | 			goto next; | ||||||
|  | @ -1613,7 +1615,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, | ||||||
| 		prev = vma_merge(mm, prev, start, vma_end, new_flags, | 		prev = vma_merge(mm, prev, start, vma_end, new_flags, | ||||||
| 				 vma->anon_vma, vma->vm_file, vma->vm_pgoff, | 				 vma->anon_vma, vma->vm_file, vma->vm_pgoff, | ||||||
| 				 vma_policy(vma), | 				 vma_policy(vma), | ||||||
| 				 NULL_VM_UFFD_CTX); | 				 NULL_VM_UFFD_CTX, vma_anon_name(vma)); | ||||||
| 		if (prev) { | 		if (prev) { | ||||||
| 			vma = prev; | 			vma = prev; | ||||||
| 			goto next; | 			goto next; | ||||||
|  |  | ||||||
|  | @ -4,7 +4,6 @@ | ||||||
|  * All Rights Reserved. |  * All Rights Reserved. | ||||||
|  */ |  */ | ||||||
| #include "xfs.h" | #include "xfs.h" | ||||||
| #include <linux/backing-dev.h> |  | ||||||
| #include "xfs_message.h" | #include "xfs_message.h" | ||||||
| #include "xfs_trace.h" | #include "xfs_trace.h" | ||||||
| 
 | 
 | ||||||
|  | @ -26,6 +25,6 @@ kmem_alloc(size_t size, xfs_km_flags_t flags) | ||||||
| 	"%s(%u) possible memory allocation deadlock size %u in %s (mode:0x%x)", | 	"%s(%u) possible memory allocation deadlock size %u in %s (mode:0x%x)", | ||||||
| 				current->comm, current->pid, | 				current->comm, current->pid, | ||||||
| 				(unsigned int)size, __func__, lflags); | 				(unsigned int)size, __func__, lflags); | ||||||
| 		congestion_wait(BLK_RW_ASYNC, HZ/50); | 		memalloc_retry_wait(lflags); | ||||||
| 	} while (1); | 	} while (1); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -394,7 +394,7 @@ xfs_buf_alloc_pages( | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		XFS_STATS_INC(bp->b_mount, xb_page_retries); | 		XFS_STATS_INC(bp->b_mount, xb_page_retries); | ||||||
| 		congestion_wait(BLK_RW_ASYNC, HZ / 50); | 		memalloc_retry_wait(gfp_mask); | ||||||
| 	} | 	} | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -295,7 +295,6 @@ extern bool libceph_compatible(void *data); | ||||||
| 
 | 
 | ||||||
| extern const char *ceph_msg_type_name(int type); | extern const char *ceph_msg_type_name(int type); | ||||||
| extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); | ||||||
| extern void *ceph_kvmalloc(size_t size, gfp_t flags); |  | ||||||
| 
 | 
 | ||||||
| struct fs_parameter; | struct fs_parameter; | ||||||
| struct fc_log; | struct fc_log; | ||||||
|  |  | ||||||
|  | @ -11,12 +11,19 @@ | ||||||
| #include <linux/mutex.h> | #include <linux/mutex.h> | ||||||
| #include <linux/time64.h> | #include <linux/time64.h> | ||||||
| #include <linux/types.h> | #include <linux/types.h> | ||||||
|  | #include <linux/random.h> | ||||||
| 
 | 
 | ||||||
| /* Minimal region size.  Every damon_region is aligned by this. */ | /* Minimal region size.  Every damon_region is aligned by this. */ | ||||||
| #define DAMON_MIN_REGION	PAGE_SIZE | #define DAMON_MIN_REGION	PAGE_SIZE | ||||||
| /* Max priority score for DAMON-based operation schemes */ | /* Max priority score for DAMON-based operation schemes */ | ||||||
| #define DAMOS_MAX_SCORE		(99) | #define DAMOS_MAX_SCORE		(99) | ||||||
| 
 | 
 | ||||||
|  | /* Get a random number in [l, r) */ | ||||||
|  | static inline unsigned long damon_rand(unsigned long l, unsigned long r) | ||||||
|  | { | ||||||
|  | 	return l + prandom_u32_max(r - l); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /**
 | /**
 | ||||||
|  * struct damon_addr_range - Represents an address region of [@start, @end). |  * struct damon_addr_range - Represents an address region of [@start, @end). | ||||||
|  * @start:	Start address of the region (inclusive). |  * @start:	Start address of the region (inclusive). | ||||||
|  | @ -185,6 +192,22 @@ struct damos_watermarks { | ||||||
| 	bool activated; | 	bool activated; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | /**
 | ||||||
|  |  * struct damos_stat - Statistics on a given scheme. | ||||||
|  |  * @nr_tried:	Total number of regions that the scheme is tried to be applied. | ||||||
|  |  * @sz_tried:	Total size of regions that the scheme is tried to be applied. | ||||||
|  |  * @nr_applied:	Total number of regions that the scheme is applied. | ||||||
|  |  * @sz_applied:	Total size of regions that the scheme is applied. | ||||||
|  |  * @qt_exceeds: Total number of times the quota of the scheme has exceeded. | ||||||
|  |  */ | ||||||
|  | struct damos_stat { | ||||||
|  | 	unsigned long nr_tried; | ||||||
|  | 	unsigned long sz_tried; | ||||||
|  | 	unsigned long nr_applied; | ||||||
|  | 	unsigned long sz_applied; | ||||||
|  | 	unsigned long qt_exceeds; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| /**
 | /**
 | ||||||
|  * struct damos - Represents a Data Access Monitoring-based Operation Scheme. |  * struct damos - Represents a Data Access Monitoring-based Operation Scheme. | ||||||
|  * @min_sz_region:	Minimum size of target regions. |  * @min_sz_region:	Minimum size of target regions. | ||||||
|  | @ -196,8 +219,7 @@ struct damos_watermarks { | ||||||
|  * @action:		&damo_action to be applied to the target regions. |  * @action:		&damo_action to be applied to the target regions. | ||||||
|  * @quota:		Control the aggressiveness of this scheme. |  * @quota:		Control the aggressiveness of this scheme. | ||||||
|  * @wmarks:		Watermarks for automated (in)activation of this scheme. |  * @wmarks:		Watermarks for automated (in)activation of this scheme. | ||||||
|  * @stat_count:		Total number of regions that this scheme is applied. |  * @stat:		Statistics of this scheme. | ||||||
|  * @stat_sz:		Total size of regions that this scheme is applied. |  | ||||||
|  * @list:		List head for siblings. |  * @list:		List head for siblings. | ||||||
|  * |  * | ||||||
|  * For each aggregation interval, DAMON finds regions which fit in the |  * For each aggregation interval, DAMON finds regions which fit in the | ||||||
|  | @ -228,8 +250,7 @@ struct damos { | ||||||
| 	enum damos_action action; | 	enum damos_action action; | ||||||
| 	struct damos_quota quota; | 	struct damos_quota quota; | ||||||
| 	struct damos_watermarks wmarks; | 	struct damos_watermarks wmarks; | ||||||
| 	unsigned long stat_count; | 	struct damos_stat stat; | ||||||
| 	unsigned long stat_sz; |  | ||||||
| 	struct list_head list; | 	struct list_head list; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | @ -274,7 +295,8 @@ struct damon_ctx; | ||||||
|  * as an integer in [0, &DAMOS_MAX_SCORE]. |  * as an integer in [0, &DAMOS_MAX_SCORE]. | ||||||
|  * @apply_scheme is called from @kdamond when a region for user provided |  * @apply_scheme is called from @kdamond when a region for user provided | ||||||
|  * DAMON-based operation scheme is found.  It should apply the scheme's action |  * DAMON-based operation scheme is found.  It should apply the scheme's action | ||||||
|  * to the region.  This is not used for &DAMON_ARBITRARY_TARGET case. |  * to the region and return bytes of the region that the action is successfully | ||||||
|  |  * applied. | ||||||
|  * @target_valid should check whether the target is still valid for the |  * @target_valid should check whether the target is still valid for the | ||||||
|  * monitoring. |  * monitoring. | ||||||
|  * @cleanup is called from @kdamond just before its termination. |  * @cleanup is called from @kdamond just before its termination. | ||||||
|  | @ -288,8 +310,9 @@ struct damon_primitive { | ||||||
| 	int (*get_scheme_score)(struct damon_ctx *context, | 	int (*get_scheme_score)(struct damon_ctx *context, | ||||||
| 			struct damon_target *t, struct damon_region *r, | 			struct damon_target *t, struct damon_region *r, | ||||||
| 			struct damos *scheme); | 			struct damos *scheme); | ||||||
| 	int (*apply_scheme)(struct damon_ctx *context, struct damon_target *t, | 	unsigned long (*apply_scheme)(struct damon_ctx *context, | ||||||
| 			struct damon_region *r, struct damos *scheme); | 			struct damon_target *t, struct damon_region *r, | ||||||
|  | 			struct damos *scheme); | ||||||
| 	bool (*target_valid)(void *target); | 	bool (*target_valid)(void *target); | ||||||
| 	void (*cleanup)(struct damon_ctx *context); | 	void (*cleanup)(struct damon_ctx *context); | ||||||
| }; | }; | ||||||
|  | @ -392,14 +415,20 @@ struct damon_ctx { | ||||||
| 	struct list_head schemes; | 	struct list_head schemes; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| #define damon_next_region(r) \ | static inline struct damon_region *damon_next_region(struct damon_region *r) | ||||||
| 	(container_of(r->list.next, struct damon_region, list)) | { | ||||||
|  | 	return container_of(r->list.next, struct damon_region, list); | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| #define damon_prev_region(r) \ | static inline struct damon_region *damon_prev_region(struct damon_region *r) | ||||||
| 	(container_of(r->list.prev, struct damon_region, list)) | { | ||||||
|  | 	return container_of(r->list.prev, struct damon_region, list); | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| #define damon_last_region(t) \ | static inline struct damon_region *damon_last_region(struct damon_target *t) | ||||||
| 	(list_last_entry(&t->regions_list, struct damon_region, list)) | { | ||||||
|  | 	return list_last_entry(&t->regions_list, struct damon_region, list); | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| #define damon_for_each_region(r, t) \ | #define damon_for_each_region(r, t) \ | ||||||
| 	list_for_each_entry(r, &t->regions_list, list) | 	list_for_each_entry(r, &t->regions_list, list) | ||||||
|  | @ -422,9 +451,18 @@ struct damon_ctx { | ||||||
| #ifdef CONFIG_DAMON | #ifdef CONFIG_DAMON | ||||||
| 
 | 
 | ||||||
| struct damon_region *damon_new_region(unsigned long start, unsigned long end); | struct damon_region *damon_new_region(unsigned long start, unsigned long end); | ||||||
| inline void damon_insert_region(struct damon_region *r, | 
 | ||||||
|  | /*
 | ||||||
|  |  * Add a region between two other regions | ||||||
|  |  */ | ||||||
|  | static inline void damon_insert_region(struct damon_region *r, | ||||||
| 		struct damon_region *prev, struct damon_region *next, | 		struct damon_region *prev, struct damon_region *next, | ||||||
| 		struct damon_target *t); | 		struct damon_target *t) | ||||||
|  | { | ||||||
|  | 	__list_add(&r->list, &prev->list, &next->list); | ||||||
|  | 	t->nr_regions++; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void damon_add_region(struct damon_region *r, struct damon_target *t); | void damon_add_region(struct damon_region *r, struct damon_target *t); | ||||||
| void damon_destroy_region(struct damon_region *r, struct damon_target *t); | void damon_destroy_region(struct damon_region *r, struct damon_target *t); | ||||||
| 
 | 
 | ||||||
|  | @ -461,34 +499,13 @@ int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); | ||||||
| #endif	/* CONFIG_DAMON */ | #endif	/* CONFIG_DAMON */ | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_DAMON_VADDR | #ifdef CONFIG_DAMON_VADDR | ||||||
| 
 |  | ||||||
| /* Monitoring primitives for virtual memory address spaces */ |  | ||||||
| void damon_va_init(struct damon_ctx *ctx); |  | ||||||
| void damon_va_update(struct damon_ctx *ctx); |  | ||||||
| void damon_va_prepare_access_checks(struct damon_ctx *ctx); |  | ||||||
| unsigned int damon_va_check_accesses(struct damon_ctx *ctx); |  | ||||||
| bool damon_va_target_valid(void *t); | bool damon_va_target_valid(void *t); | ||||||
| void damon_va_cleanup(struct damon_ctx *ctx); |  | ||||||
| int damon_va_apply_scheme(struct damon_ctx *context, struct damon_target *t, |  | ||||||
| 		struct damon_region *r, struct damos *scheme); |  | ||||||
| int damon_va_scheme_score(struct damon_ctx *context, struct damon_target *t, |  | ||||||
| 		struct damon_region *r, struct damos *scheme); |  | ||||||
| void damon_va_set_primitives(struct damon_ctx *ctx); | void damon_va_set_primitives(struct damon_ctx *ctx); | ||||||
| 
 |  | ||||||
| #endif	/* CONFIG_DAMON_VADDR */ | #endif	/* CONFIG_DAMON_VADDR */ | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_DAMON_PADDR | #ifdef CONFIG_DAMON_PADDR | ||||||
| 
 |  | ||||||
| /* Monitoring primitives for the physical memory address space */ |  | ||||||
| void damon_pa_prepare_access_checks(struct damon_ctx *ctx); |  | ||||||
| unsigned int damon_pa_check_accesses(struct damon_ctx *ctx); |  | ||||||
| bool damon_pa_target_valid(void *t); | bool damon_pa_target_valid(void *t); | ||||||
| int damon_pa_apply_scheme(struct damon_ctx *context, struct damon_target *t, |  | ||||||
| 		struct damon_region *r, struct damos *scheme); |  | ||||||
| int damon_pa_scheme_score(struct damon_ctx *context, struct damon_target *t, |  | ||||||
| 		struct damon_region *r, struct damos *scheme); |  | ||||||
| void damon_pa_set_primitives(struct damon_ctx *ctx); | void damon_pa_set_primitives(struct damon_ctx *ctx); | ||||||
| 
 |  | ||||||
| #endif	/* CONFIG_DAMON_PADDR */ | #endif	/* CONFIG_DAMON_PADDR */ | ||||||
| 
 | 
 | ||||||
| #endif	/* _DAMON_H */ | #endif	/* _DAMON_H */ | ||||||
|  |  | ||||||
|  | @ -3093,6 +3093,7 @@ extern void unlock_new_inode(struct inode *); | ||||||
| extern void discard_new_inode(struct inode *); | extern void discard_new_inode(struct inode *); | ||||||
| extern unsigned int get_next_ino(void); | extern unsigned int get_next_ino(void); | ||||||
| extern void evict_inodes(struct super_block *sb); | extern void evict_inodes(struct super_block *sb); | ||||||
|  | void dump_mapping(const struct address_space *); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Userspace may rely on the the inode number being non-zero. For example, glibc |  * Userspace may rely on the the inode number being non-zero. For example, glibc | ||||||
|  |  | ||||||
|  | @ -302,7 +302,9 @@ struct vm_area_struct; | ||||||
|  * lowest zone as a type of emergency reserve. |  * lowest zone as a type of emergency reserve. | ||||||
|  * |  * | ||||||
|  * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit |  * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit | ||||||
|  * address. |  * address. Note that kmalloc(..., GFP_DMA32) does not return DMA32 memory | ||||||
|  |  * because the DMA32 kmalloc cache array is not implemented. | ||||||
|  |  * (Reason: there is no such user in kernel). | ||||||
|  * |  * | ||||||
|  * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace, |  * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace, | ||||||
|  * do not need to be directly accessible by the kernel but that cannot |  * do not need to be directly accessible by the kernel but that cannot | ||||||
|  | @ -598,9 +600,9 @@ struct page *alloc_pages(gfp_t gfp, unsigned int order); | ||||||
| struct folio *folio_alloc(gfp_t gfp, unsigned order); | struct folio *folio_alloc(gfp_t gfp, unsigned order); | ||||||
| extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, | extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, | ||||||
| 			struct vm_area_struct *vma, unsigned long addr, | 			struct vm_area_struct *vma, unsigned long addr, | ||||||
| 			int node, bool hugepage); | 			bool hugepage); | ||||||
| #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ | #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ | ||||||
| 	alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) | 	alloc_pages_vma(gfp_mask, order, vma, addr, true) | ||||||
| #else | #else | ||||||
| static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order) | static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order) | ||||||
| { | { | ||||||
|  | @ -610,14 +612,14 @@ static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order) | ||||||
| { | { | ||||||
| 	return __folio_alloc_node(gfp, order, numa_node_id()); | 	return __folio_alloc_node(gfp, order, numa_node_id()); | ||||||
| } | } | ||||||
| #define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ | #define alloc_pages_vma(gfp_mask, order, vma, addr, false)\ | ||||||
| 	alloc_pages(gfp_mask, order) | 	alloc_pages(gfp_mask, order) | ||||||
| #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ | #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ | ||||||
| 	alloc_pages(gfp_mask, order) | 	alloc_pages(gfp_mask, order) | ||||||
| #endif | #endif | ||||||
| #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) | #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) | ||||||
| #define alloc_page_vma(gfp_mask, vma, addr)			\ | #define alloc_page_vma(gfp_mask, vma, addr)			\ | ||||||
| 	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) | 	alloc_pages_vma(gfp_mask, 0, vma, addr, false) | ||||||
| 
 | 
 | ||||||
| extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); | extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); | ||||||
| extern unsigned long get_zeroed_page(gfp_t gfp_mask); | extern unsigned long get_zeroed_page(gfp_t gfp_mask); | ||||||
|  |  | ||||||
|  | @ -622,8 +622,8 @@ struct hstate { | ||||||
| #endif | #endif | ||||||
| #ifdef CONFIG_CGROUP_HUGETLB | #ifdef CONFIG_CGROUP_HUGETLB | ||||||
| 	/* cgroup control files */ | 	/* cgroup control files */ | ||||||
| 	struct cftype cgroup_files_dfl[7]; | 	struct cftype cgroup_files_dfl[8]; | ||||||
| 	struct cftype cgroup_files_legacy[9]; | 	struct cftype cgroup_files_legacy[10]; | ||||||
| #endif | #endif | ||||||
| 	char name[HSTATE_NAME_LEN]; | 	char name[HSTATE_NAME_LEN]; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -36,6 +36,11 @@ enum hugetlb_memory_event { | ||||||
| 	HUGETLB_NR_MEMORY_EVENTS, | 	HUGETLB_NR_MEMORY_EVENTS, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | struct hugetlb_cgroup_per_node { | ||||||
|  | 	/* hugetlb usage in pages over all hstates. */ | ||||||
|  | 	unsigned long usage[HUGE_MAX_HSTATE]; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| struct hugetlb_cgroup { | struct hugetlb_cgroup { | ||||||
| 	struct cgroup_subsys_state css; | 	struct cgroup_subsys_state css; | ||||||
| 
 | 
 | ||||||
|  | @ -57,6 +62,8 @@ struct hugetlb_cgroup { | ||||||
| 
 | 
 | ||||||
| 	/* Handle for "hugetlb.events.local" */ | 	/* Handle for "hugetlb.events.local" */ | ||||||
| 	struct cgroup_file events_local_file[HUGE_MAX_HSTATE]; | 	struct cgroup_file events_local_file[HUGE_MAX_HSTATE]; | ||||||
|  | 
 | ||||||
|  | 	struct hugetlb_cgroup_per_node *nodeinfo[]; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static inline struct hugetlb_cgroup * | static inline struct hugetlb_cgroup * | ||||||
|  |  | ||||||
Some files were not shown because too many files have changed in this diff Show more
		Loading…
	
		Reference in a new issue
	
	 Linus Torvalds
						Linus Torvalds