mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	block: remove legacy IO schedulers
Retain the deadline documentation, as that carries over to mq-deadline as well. Tested-by: Ming Lei <ming.lei@redhat.com> Reviewed-by: Omar Sandoval <osandov@fb.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									404b8f5a03
								
							
						
					
					
						commit
						f382fb0bce
					
				
					 7 changed files with 0 additions and 6025 deletions
				
			
		| 
						 | 
					@ -1,291 +0,0 @@
 | 
				
			||||||
CFQ (Complete Fairness Queueing)
 | 
					 | 
				
			||||||
===============================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The main aim of CFQ scheduler is to provide a fair allocation of the disk
 | 
					 | 
				
			||||||
I/O bandwidth for all the processes which requests an I/O operation.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
CFQ maintains the per process queue for the processes which request I/O
 | 
					 | 
				
			||||||
operation(synchronous requests). In case of asynchronous requests, all the
 | 
					 | 
				
			||||||
requests from all the processes are batched together according to their
 | 
					 | 
				
			||||||
process's I/O priority.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
CFQ ioscheduler tunables
 | 
					 | 
				
			||||||
========================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
slice_idle
 | 
					 | 
				
			||||||
----------
 | 
					 | 
				
			||||||
This specifies how long CFQ should idle for next request on certain cfq queues
 | 
					 | 
				
			||||||
(for sequential workloads) and service trees (for random workloads) before
 | 
					 | 
				
			||||||
queue is expired and CFQ selects next queue to dispatch from.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
By default slice_idle is a non-zero value. That means by default we idle on
 | 
					 | 
				
			||||||
queues/service trees. This can be very helpful on highly seeky media like
 | 
					 | 
				
			||||||
single spindle SATA/SAS disks where we can cut down on overall number of
 | 
					 | 
				
			||||||
seeks and see improved throughput.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Setting slice_idle to 0 will remove all the idling on queues/service tree
 | 
					 | 
				
			||||||
level and one should see an overall improved throughput on faster storage
 | 
					 | 
				
			||||||
devices like multiple SATA/SAS disks in hardware RAID configuration. The down
 | 
					 | 
				
			||||||
side is that isolation provided from WRITES also goes down and notion of
 | 
					 | 
				
			||||||
IO priority becomes weaker.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
So depending on storage and workload, it might be useful to set slice_idle=0.
 | 
					 | 
				
			||||||
In general I think for SATA/SAS disks and software RAID of SATA/SAS disks
 | 
					 | 
				
			||||||
keeping slice_idle enabled should be useful. For any configurations where
 | 
					 | 
				
			||||||
there are multiple spindles behind single LUN (Host based hardware RAID
 | 
					 | 
				
			||||||
controller or for storage arrays), setting slice_idle=0 might end up in better
 | 
					 | 
				
			||||||
throughput and acceptable latencies.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
back_seek_max
 | 
					 | 
				
			||||||
-------------
 | 
					 | 
				
			||||||
This specifies, given in Kbytes, the maximum "distance" for backward seeking.
 | 
					 | 
				
			||||||
The distance is the amount of space from the current head location to the
 | 
					 | 
				
			||||||
sectors that are backward in terms of distance.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This parameter allows the scheduler to anticipate requests in the "backward"
 | 
					 | 
				
			||||||
direction and consider them as being the "next" if they are within this
 | 
					 | 
				
			||||||
distance from the current head location.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
back_seek_penalty
 | 
					 | 
				
			||||||
-----------------
 | 
					 | 
				
			||||||
This parameter is used to compute the cost of backward seeking. If the
 | 
					 | 
				
			||||||
backward distance of request is just 1/back_seek_penalty from a "front"
 | 
					 | 
				
			||||||
request, then the seeking cost of two requests is considered equivalent.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
So scheduler will not bias toward one or the other request (otherwise scheduler
 | 
					 | 
				
			||||||
will bias toward front request). Default value of back_seek_penalty is 2.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fifo_expire_async
 | 
					 | 
				
			||||||
-----------------
 | 
					 | 
				
			||||||
This parameter is used to set the timeout of asynchronous requests. Default
 | 
					 | 
				
			||||||
value of this is 248ms.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fifo_expire_sync
 | 
					 | 
				
			||||||
----------------
 | 
					 | 
				
			||||||
This parameter is used to set the timeout of synchronous requests. Default
 | 
					 | 
				
			||||||
value of this is 124ms. In case to favor synchronous requests over asynchronous
 | 
					 | 
				
			||||||
one, this value should be decreased relative to fifo_expire_async.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
group_idle
 | 
					 | 
				
			||||||
-----------
 | 
					 | 
				
			||||||
This parameter forces idling at the CFQ group level instead of CFQ
 | 
					 | 
				
			||||||
queue level. This was introduced after a bottleneck was observed
 | 
					 | 
				
			||||||
in higher end storage due to idle on sequential queue and allow dispatch
 | 
					 | 
				
			||||||
from a single queue. The idea with this parameter is that it can be run with
 | 
					 | 
				
			||||||
slice_idle=0 and group_idle=8, so that idling does not happen on individual
 | 
					 | 
				
			||||||
queues in the group but happens overall on the group and thus still keeps the
 | 
					 | 
				
			||||||
IO controller working.
 | 
					 | 
				
			||||||
Not idling on individual queues in the group will dispatch requests from
 | 
					 | 
				
			||||||
multiple queues in the group at the same time and achieve higher throughput
 | 
					 | 
				
			||||||
on higher end storage.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Default value for this parameter is 8ms.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
low_latency
 | 
					 | 
				
			||||||
-----------
 | 
					 | 
				
			||||||
This parameter is used to enable/disable the low latency mode of the CFQ
 | 
					 | 
				
			||||||
scheduler. If enabled, CFQ tries to recompute the slice time for each process
 | 
					 | 
				
			||||||
based on the target_latency set for the system. This favors fairness over
 | 
					 | 
				
			||||||
throughput. Disabling low latency (setting it to 0) ignores target latency,
 | 
					 | 
				
			||||||
allowing each process in the system to get a full time slice.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
By default low latency mode is enabled.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
target_latency
 | 
					 | 
				
			||||||
--------------
 | 
					 | 
				
			||||||
This parameter is used to calculate the time slice for a process if cfq's
 | 
					 | 
				
			||||||
latency mode is enabled. It will ensure that sync requests have an estimated
 | 
					 | 
				
			||||||
latency. But if sequential workload is higher(e.g. sequential read),
 | 
					 | 
				
			||||||
then to meet the latency constraints, throughput may decrease because of less
 | 
					 | 
				
			||||||
time for each process to issue I/O request before the cfq queue is switched.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Though this can be overcome by disabling the latency_mode, it may increase
 | 
					 | 
				
			||||||
the read latency for some applications. This parameter allows for changing
 | 
					 | 
				
			||||||
target_latency through the sysfs interface which can provide the balanced
 | 
					 | 
				
			||||||
throughput and read latency.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Default value for target_latency is 300ms.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
slice_async
 | 
					 | 
				
			||||||
-----------
 | 
					 | 
				
			||||||
This parameter is same as of slice_sync but for asynchronous queue. The
 | 
					 | 
				
			||||||
default value is 40ms.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
slice_async_rq
 | 
					 | 
				
			||||||
--------------
 | 
					 | 
				
			||||||
This parameter is used to limit the dispatching of asynchronous request to
 | 
					 | 
				
			||||||
device request queue in queue's slice time. The maximum number of request that
 | 
					 | 
				
			||||||
are allowed to be dispatched also depends upon the io priority. Default value
 | 
					 | 
				
			||||||
for this is 2.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
slice_sync
 | 
					 | 
				
			||||||
----------
 | 
					 | 
				
			||||||
When a queue is selected for execution, the queues IO requests are only
 | 
					 | 
				
			||||||
executed for a certain amount of time(time_slice) before switching to another
 | 
					 | 
				
			||||||
queue. This parameter is used to calculate the time slice of synchronous
 | 
					 | 
				
			||||||
queue.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
time_slice is computed using the below equation:-
 | 
					 | 
				
			||||||
time_slice = slice_sync + (slice_sync/5 * (4 - prio)). To increase the
 | 
					 | 
				
			||||||
time_slice of synchronous queue, increase the value of slice_sync. Default
 | 
					 | 
				
			||||||
value is 100ms.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
quantum
 | 
					 | 
				
			||||||
-------
 | 
					 | 
				
			||||||
This specifies the number of request dispatched to the device queue. In a
 | 
					 | 
				
			||||||
queue's time slice, a request will not be dispatched if the number of request
 | 
					 | 
				
			||||||
in the device exceeds this parameter. This parameter is used for synchronous
 | 
					 | 
				
			||||||
request.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In case of storage with several disk, this setting can limit the parallel
 | 
					 | 
				
			||||||
processing of request. Therefore, increasing the value can improve the
 | 
					 | 
				
			||||||
performance although this can cause the latency of some I/O to increase due
 | 
					 | 
				
			||||||
to more number of requests.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
CFQ Group scheduling
 | 
					 | 
				
			||||||
====================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
CFQ supports blkio cgroup and has "blkio." prefixed files in each
 | 
					 | 
				
			||||||
blkio cgroup directory. It is weight-based and there are four knobs
 | 
					 | 
				
			||||||
for configuration - weight[_device] and leaf_weight[_device].
 | 
					 | 
				
			||||||
Internal cgroup nodes (the ones with children) can also have tasks in
 | 
					 | 
				
			||||||
them, so the former two configure how much proportion the cgroup as a
 | 
					 | 
				
			||||||
whole is entitled to at its parent's level while the latter two
 | 
					 | 
				
			||||||
configure how much proportion the tasks in the cgroup have compared to
 | 
					 | 
				
			||||||
its direct children.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Another way to think about it is assuming that each internal node has
 | 
					 | 
				
			||||||
an implicit leaf child node which hosts all the tasks whose weight is
 | 
					 | 
				
			||||||
configured by leaf_weight[_device]. Let's assume a blkio hierarchy
 | 
					 | 
				
			||||||
composed of five cgroups - root, A, B, AA and AB - with the following
 | 
					 | 
				
			||||||
weights where the names represent the hierarchy.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        weight leaf_weight
 | 
					 | 
				
			||||||
 root :  125    125
 | 
					 | 
				
			||||||
 A    :  500    750
 | 
					 | 
				
			||||||
 B    :  250    500
 | 
					 | 
				
			||||||
 AA   :  500    500
 | 
					 | 
				
			||||||
 AB   : 1000    500
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
root never has a parent making its weight is meaningless. For backward
 | 
					 | 
				
			||||||
compatibility, weight is always kept in sync with leaf_weight. B, AA
 | 
					 | 
				
			||||||
and AB have no child and thus its tasks have no children cgroup to
 | 
					 | 
				
			||||||
compete with. They always get 100% of what the cgroup won at the
 | 
					 | 
				
			||||||
parent level. Considering only the weights which matter, the hierarchy
 | 
					 | 
				
			||||||
looks like the following.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
          root
 | 
					 | 
				
			||||||
       /    |   \
 | 
					 | 
				
			||||||
      A     B    leaf
 | 
					 | 
				
			||||||
     500   250   125
 | 
					 | 
				
			||||||
   /  |  \
 | 
					 | 
				
			||||||
  AA  AB  leaf
 | 
					 | 
				
			||||||
 500 1000 750
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If all cgroups have active IOs and competing with each other, disk
 | 
					 | 
				
			||||||
time will be distributed like the following.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Distribution below root. The total active weight at this level is
 | 
					 | 
				
			||||||
A:500 + B:250 + C:125 = 875.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 root-leaf :   125 /  875      =~ 14%
 | 
					 | 
				
			||||||
 A         :   500 /  875      =~ 57%
 | 
					 | 
				
			||||||
 B(-leaf)  :   250 /  875      =~ 28%
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
A has children and further distributes its 57% among the children and
 | 
					 | 
				
			||||||
the implicit leaf node. The total active weight at this level is
 | 
					 | 
				
			||||||
AA:500 + AB:1000 + A-leaf:750 = 2250.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 A-leaf    : ( 750 / 2250) * A =~ 19%
 | 
					 | 
				
			||||||
 AA(-leaf) : ( 500 / 2250) * A =~ 12%
 | 
					 | 
				
			||||||
 AB(-leaf) : (1000 / 2250) * A =~ 25%
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
CFQ IOPS Mode for group scheduling
 | 
					 | 
				
			||||||
===================================
 | 
					 | 
				
			||||||
Basic CFQ design is to provide priority based time slices. Higher priority
 | 
					 | 
				
			||||||
process gets bigger time slice and lower priority process gets smaller time
 | 
					 | 
				
			||||||
slice. Measuring time becomes harder if storage is fast and supports NCQ and
 | 
					 | 
				
			||||||
it would be better to dispatch multiple requests from multiple cfq queues in
 | 
					 | 
				
			||||||
request queue at a time. In such scenario, it is not possible to measure time
 | 
					 | 
				
			||||||
consumed by single queue accurately.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
What is possible though is to measure number of requests dispatched from a
 | 
					 | 
				
			||||||
single queue and also allow dispatch from multiple cfq queue at the same time.
 | 
					 | 
				
			||||||
This effectively becomes the fairness in terms of IOPS (IO operations per
 | 
					 | 
				
			||||||
second).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If one sets slice_idle=0 and if storage supports NCQ, CFQ internally switches
 | 
					 | 
				
			||||||
to IOPS mode and starts providing fairness in terms of number of requests
 | 
					 | 
				
			||||||
dispatched. Note that this mode switching takes effect only for group
 | 
					 | 
				
			||||||
scheduling. For non-cgroup users nothing should change.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
CFQ IO scheduler Idling Theory
 | 
					 | 
				
			||||||
===============================
 | 
					 | 
				
			||||||
Idling on a queue is primarily about waiting for the next request to come
 | 
					 | 
				
			||||||
on same queue after completion of a request. In this process CFQ will not
 | 
					 | 
				
			||||||
dispatch requests from other cfq queues even if requests are pending there.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The rationale behind idling is that it can cut down on number of seeks
 | 
					 | 
				
			||||||
on rotational media. For example, if a process is doing dependent
 | 
					 | 
				
			||||||
sequential reads (next read will come on only after completion of previous
 | 
					 | 
				
			||||||
one), then not dispatching request from other queue should help as we
 | 
					 | 
				
			||||||
did not move the disk head and kept on dispatching sequential IO from
 | 
					 | 
				
			||||||
one queue.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
CFQ has following service trees and various queues are put on these trees.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	sync-idle	sync-noidle	async
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
All cfq queues doing synchronous sequential IO go on to sync-idle tree.
 | 
					 | 
				
			||||||
On this tree we idle on each queue individually.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
All synchronous non-sequential queues go on sync-noidle tree. Also any
 | 
					 | 
				
			||||||
synchronous write request which is not marked with REQ_IDLE goes on this
 | 
					 | 
				
			||||||
service tree. On this tree we do not idle on individual queues instead idle
 | 
					 | 
				
			||||||
on the whole group of queues or the tree. So if there are 4 queues waiting
 | 
					 | 
				
			||||||
for IO to dispatch we will idle only once last queue has dispatched the IO
 | 
					 | 
				
			||||||
and there is no more IO on this service tree.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
All async writes go on async service tree. There is no idling on async
 | 
					 | 
				
			||||||
queues.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
CFQ has some optimizations for SSDs and if it detects a non-rotational
 | 
					 | 
				
			||||||
media which can support higher queue depth (multiple requests at in
 | 
					 | 
				
			||||||
flight at a time), then it cuts down on idling of individual queues and
 | 
					 | 
				
			||||||
all the queues move to sync-noidle tree and only tree idle remains. This
 | 
					 | 
				
			||||||
tree idling provides isolation with buffered write queues on async tree.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
FAQ
 | 
					 | 
				
			||||||
===
 | 
					 | 
				
			||||||
Q1. Why to idle at all on queues not marked with REQ_IDLE.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
A1. We only do tree idle (all queues on sync-noidle tree) on queues not marked
 | 
					 | 
				
			||||||
    with REQ_IDLE. This helps in providing isolation with all the sync-idle
 | 
					 | 
				
			||||||
    queues. Otherwise in presence of many sequential readers, other
 | 
					 | 
				
			||||||
    synchronous IO might not get fair share of disk.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    For example, if there are 10 sequential readers doing IO and they get
 | 
					 | 
				
			||||||
    100ms each. If a !REQ_IDLE request comes in, it will be scheduled
 | 
					 | 
				
			||||||
    roughly after 1 second. If after completion of !REQ_IDLE request we
 | 
					 | 
				
			||||||
    do not idle, and after a couple of milli seconds a another !REQ_IDLE
 | 
					 | 
				
			||||||
    request comes in, again it will be scheduled after 1second. Repeat it
 | 
					 | 
				
			||||||
    and notice how a workload can lose its disk share and suffer due to
 | 
					 | 
				
			||||||
    multiple sequential readers.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fsync can generate dependent IO where bunch of data is written in the
 | 
					 | 
				
			||||||
    context of fsync, and later some journaling data is written. Journaling
 | 
					 | 
				
			||||||
    data comes in only after fsync has finished its IO (atleast for ext4
 | 
					 | 
				
			||||||
    that seemed to be the case). Now if one decides not to idle on fsync
 | 
					 | 
				
			||||||
    thread due to !REQ_IDLE, then next journaling write will not get
 | 
					 | 
				
			||||||
    scheduled for another second. A process doing small fsync, will suffer
 | 
					 | 
				
			||||||
    badly in presence of multiple sequential readers.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    Hence doing tree idling on threads using !REQ_IDLE flag on requests
 | 
					 | 
				
			||||||
    provides isolation from multiple sequential readers and at the same
 | 
					 | 
				
			||||||
    time we do not idle on individual threads.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Q2. When to specify REQ_IDLE
 | 
					 | 
				
			||||||
A2. I would think whenever one is doing synchronous write and expecting
 | 
					 | 
				
			||||||
    more writes to be dispatched from same context soon, should be able
 | 
					 | 
				
			||||||
    to specify REQ_IDLE on writes and that probably should work well for
 | 
					 | 
				
			||||||
    most of the cases.
 | 
					 | 
				
			||||||
| 
						 | 
					@ -3,67 +3,6 @@ if BLOCK
 | 
				
			||||||
 | 
					
 | 
				
			||||||
menu "IO Schedulers"
 | 
					menu "IO Schedulers"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
config IOSCHED_NOOP
 | 
					 | 
				
			||||||
	bool
 | 
					 | 
				
			||||||
	default y
 | 
					 | 
				
			||||||
	---help---
 | 
					 | 
				
			||||||
	  The no-op I/O scheduler is a minimal scheduler that does basic merging
 | 
					 | 
				
			||||||
	  and sorting. Its main uses include non-disk based block devices like
 | 
					 | 
				
			||||||
	  memory devices, and specialised software or hardware environments
 | 
					 | 
				
			||||||
	  that do their own scheduling and require only minimal assistance from
 | 
					 | 
				
			||||||
	  the kernel.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
config IOSCHED_DEADLINE
 | 
					 | 
				
			||||||
	tristate "Deadline I/O scheduler"
 | 
					 | 
				
			||||||
	default y
 | 
					 | 
				
			||||||
	---help---
 | 
					 | 
				
			||||||
	  The deadline I/O scheduler is simple and compact. It will provide
 | 
					 | 
				
			||||||
	  CSCAN service with FIFO expiration of requests, switching to
 | 
					 | 
				
			||||||
	  a new point in the service tree and doing a batch of IO from there
 | 
					 | 
				
			||||||
	  in case of expiry.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
config IOSCHED_CFQ
 | 
					 | 
				
			||||||
	tristate "CFQ I/O scheduler"
 | 
					 | 
				
			||||||
	default y
 | 
					 | 
				
			||||||
	---help---
 | 
					 | 
				
			||||||
	  The CFQ I/O scheduler tries to distribute bandwidth equally
 | 
					 | 
				
			||||||
	  among all processes in the system. It should provide a fair
 | 
					 | 
				
			||||||
	  and low latency working environment, suitable for both desktop
 | 
					 | 
				
			||||||
	  and server systems.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	  This is the default I/O scheduler.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
config CFQ_GROUP_IOSCHED
 | 
					 | 
				
			||||||
	bool "CFQ Group Scheduling support"
 | 
					 | 
				
			||||||
	depends on IOSCHED_CFQ && BLK_CGROUP
 | 
					 | 
				
			||||||
	---help---
 | 
					 | 
				
			||||||
	  Enable group IO scheduling in CFQ.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
choice
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	prompt "Default I/O scheduler"
 | 
					 | 
				
			||||||
	default DEFAULT_CFQ
 | 
					 | 
				
			||||||
	help
 | 
					 | 
				
			||||||
	  Select the I/O scheduler which will be used by default for all
 | 
					 | 
				
			||||||
	  block devices.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	config DEFAULT_DEADLINE
 | 
					 | 
				
			||||||
		bool "Deadline" if IOSCHED_DEADLINE=y
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	config DEFAULT_CFQ
 | 
					 | 
				
			||||||
		bool "CFQ" if IOSCHED_CFQ=y
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	config DEFAULT_NOOP
 | 
					 | 
				
			||||||
		bool "No-op"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
endchoice
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
config DEFAULT_IOSCHED
 | 
					 | 
				
			||||||
	string
 | 
					 | 
				
			||||||
	default "deadline" if DEFAULT_DEADLINE
 | 
					 | 
				
			||||||
	default "cfq" if DEFAULT_CFQ
 | 
					 | 
				
			||||||
	default "noop" if DEFAULT_NOOP
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
config MQ_IOSCHED_DEADLINE
 | 
					config MQ_IOSCHED_DEADLINE
 | 
				
			||||||
	tristate "MQ deadline I/O scheduler"
 | 
						tristate "MQ deadline I/O scheduler"
 | 
				
			||||||
	default y
 | 
						default y
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -18,9 +18,6 @@ obj-$(CONFIG_BLK_DEV_BSGLIB)	+= bsg-lib.o
 | 
				
			||||||
obj-$(CONFIG_BLK_CGROUP)	+= blk-cgroup.o
 | 
					obj-$(CONFIG_BLK_CGROUP)	+= blk-cgroup.o
 | 
				
			||||||
obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o
 | 
					obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o
 | 
				
			||||||
obj-$(CONFIG_BLK_CGROUP_IOLATENCY)	+= blk-iolatency.o
 | 
					obj-$(CONFIG_BLK_CGROUP_IOLATENCY)	+= blk-iolatency.o
 | 
				
			||||||
obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
 | 
					 | 
				
			||||||
obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
 | 
					 | 
				
			||||||
obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
 | 
					 | 
				
			||||||
obj-$(CONFIG_MQ_IOSCHED_DEADLINE)	+= mq-deadline.o
 | 
					obj-$(CONFIG_MQ_IOSCHED_DEADLINE)	+= mq-deadline.o
 | 
				
			||||||
obj-$(CONFIG_MQ_IOSCHED_KYBER)	+= kyber-iosched.o
 | 
					obj-$(CONFIG_MQ_IOSCHED_KYBER)	+= kyber-iosched.o
 | 
				
			||||||
bfq-y				:= bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
 | 
					bfq-y				:= bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										4916
									
								
								block/cfq-iosched.c
									
									
									
									
									
								
							
							
						
						
									
										4916
									
								
								block/cfq-iosched.c
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							| 
						 | 
					@ -1,560 +0,0 @@
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 *  Deadline i/o scheduler.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *  Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <linux/kernel.h>
 | 
					 | 
				
			||||||
#include <linux/fs.h>
 | 
					 | 
				
			||||||
#include <linux/blkdev.h>
 | 
					 | 
				
			||||||
#include <linux/elevator.h>
 | 
					 | 
				
			||||||
#include <linux/bio.h>
 | 
					 | 
				
			||||||
#include <linux/module.h>
 | 
					 | 
				
			||||||
#include <linux/slab.h>
 | 
					 | 
				
			||||||
#include <linux/init.h>
 | 
					 | 
				
			||||||
#include <linux/compiler.h>
 | 
					 | 
				
			||||||
#include <linux/rbtree.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * See Documentation/block/deadline-iosched.txt
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static const int read_expire = HZ / 2;  /* max time before a read is submitted. */
 | 
					 | 
				
			||||||
static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
 | 
					 | 
				
			||||||
static const int writes_starved = 2;    /* max times reads can starve a write */
 | 
					 | 
				
			||||||
static const int fifo_batch = 16;       /* # of sequential requests treated as one
 | 
					 | 
				
			||||||
				     by the above parameters. For throughput. */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
struct deadline_data {
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * run time data
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * requests (deadline_rq s) are present on both sort_list and fifo_list
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	struct rb_root sort_list[2];	
 | 
					 | 
				
			||||||
	struct list_head fifo_list[2];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * next in sort order. read, write or both are NULL
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	struct request *next_rq[2];
 | 
					 | 
				
			||||||
	unsigned int batching;		/* number of sequential requests made */
 | 
					 | 
				
			||||||
	unsigned int starved;		/* times reads have starved writes */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * settings that change how the i/o scheduler behaves
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	int fifo_expire[2];
 | 
					 | 
				
			||||||
	int fifo_batch;
 | 
					 | 
				
			||||||
	int writes_starved;
 | 
					 | 
				
			||||||
	int front_merges;
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline struct rb_root *
 | 
					 | 
				
			||||||
deadline_rb_root(struct deadline_data *dd, struct request *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return &dd->sort_list[rq_data_dir(rq)];
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * get the request after `rq' in sector-sorted order
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static inline struct request *
 | 
					 | 
				
			||||||
deadline_latter_request(struct request *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct rb_node *node = rb_next(&rq->rb_node);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (node)
 | 
					 | 
				
			||||||
		return rb_entry_rq(node);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return NULL;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void
 | 
					 | 
				
			||||||
deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct rb_root *root = deadline_rb_root(dd, rq);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	elv_rb_add(root, rq);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline void
 | 
					 | 
				
			||||||
deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	const int data_dir = rq_data_dir(rq);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (dd->next_rq[data_dir] == rq)
 | 
					 | 
				
			||||||
		dd->next_rq[data_dir] = deadline_latter_request(rq);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	elv_rb_del(deadline_rb_root(dd, rq), rq);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * add rq to rbtree and fifo
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static void
 | 
					 | 
				
			||||||
deadline_add_request(struct request_queue *q, struct request *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct deadline_data *dd = q->elevator->elevator_data;
 | 
					 | 
				
			||||||
	const int data_dir = rq_data_dir(rq);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * This may be a requeue of a write request that has locked its
 | 
					 | 
				
			||||||
	 * target zone. If it is the case, this releases the zone lock.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	blk_req_zone_write_unlock(rq);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	deadline_add_rq_rb(dd, rq);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * set expire time and add to fifo list
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
 | 
					 | 
				
			||||||
	list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * remove rq from rbtree and fifo.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static void deadline_remove_request(struct request_queue *q, struct request *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct deadline_data *dd = q->elevator->elevator_data;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	rq_fifo_clear(rq);
 | 
					 | 
				
			||||||
	deadline_del_rq_rb(dd, rq);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static enum elv_merge
 | 
					 | 
				
			||||||
deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct deadline_data *dd = q->elevator->elevator_data;
 | 
					 | 
				
			||||||
	struct request *__rq;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * check for front merge
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	if (dd->front_merges) {
 | 
					 | 
				
			||||||
		sector_t sector = bio_end_sector(bio);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
 | 
					 | 
				
			||||||
		if (__rq) {
 | 
					 | 
				
			||||||
			BUG_ON(sector != blk_rq_pos(__rq));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			if (elv_bio_merge_ok(__rq, bio)) {
 | 
					 | 
				
			||||||
				*req = __rq;
 | 
					 | 
				
			||||||
				return ELEVATOR_FRONT_MERGE;
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return ELEVATOR_NO_MERGE;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void deadline_merged_request(struct request_queue *q,
 | 
					 | 
				
			||||||
				    struct request *req, enum elv_merge type)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct deadline_data *dd = q->elevator->elevator_data;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * if the merge was a front merge, we need to reposition request
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	if (type == ELEVATOR_FRONT_MERGE) {
 | 
					 | 
				
			||||||
		elv_rb_del(deadline_rb_root(dd, req), req);
 | 
					 | 
				
			||||||
		deadline_add_rq_rb(dd, req);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void
 | 
					 | 
				
			||||||
deadline_merged_requests(struct request_queue *q, struct request *req,
 | 
					 | 
				
			||||||
			 struct request *next)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * if next expires before rq, assign its expire time to rq
 | 
					 | 
				
			||||||
	 * and move into next position (next will be deleted) in fifo
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
 | 
					 | 
				
			||||||
		if (time_before((unsigned long)next->fifo_time,
 | 
					 | 
				
			||||||
				(unsigned long)req->fifo_time)) {
 | 
					 | 
				
			||||||
			list_move(&req->queuelist, &next->queuelist);
 | 
					 | 
				
			||||||
			req->fifo_time = next->fifo_time;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * kill knowledge of next, this one is a goner
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	deadline_remove_request(q, next);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * move request from sort list to dispatch queue.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static inline void
 | 
					 | 
				
			||||||
deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct request_queue *q = rq->q;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * For a zoned block device, write requests must write lock their
 | 
					 | 
				
			||||||
	 * target zone.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	blk_req_zone_write_lock(rq);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	deadline_remove_request(q, rq);
 | 
					 | 
				
			||||||
	elv_dispatch_add_tail(q, rq);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * move an entry to dispatch queue
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static void
 | 
					 | 
				
			||||||
deadline_move_request(struct deadline_data *dd, struct request *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	const int data_dir = rq_data_dir(rq);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	dd->next_rq[READ] = NULL;
 | 
					 | 
				
			||||||
	dd->next_rq[WRITE] = NULL;
 | 
					 | 
				
			||||||
	dd->next_rq[data_dir] = deadline_latter_request(rq);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * take it off the sort and fifo list, move
 | 
					 | 
				
			||||||
	 * to dispatch queue
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	deadline_move_to_dispatch(dd, rq);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
 | 
					 | 
				
			||||||
 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * rq is expired!
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
 | 
					 | 
				
			||||||
		return 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * For the specified data direction, return the next request to dispatch using
 | 
					 | 
				
			||||||
 * arrival ordered lists.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static struct request *
 | 
					 | 
				
			||||||
deadline_fifo_request(struct deadline_data *dd, int data_dir)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct request *rq;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
 | 
					 | 
				
			||||||
		return NULL;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (list_empty(&dd->fifo_list[data_dir]))
 | 
					 | 
				
			||||||
		return NULL;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
 | 
					 | 
				
			||||||
	if (data_dir == READ || !blk_queue_is_zoned(rq->q))
 | 
					 | 
				
			||||||
		return rq;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Look for a write request that can be dispatched, that is one with
 | 
					 | 
				
			||||||
	 * an unlocked target zone.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	list_for_each_entry(rq, &dd->fifo_list[WRITE], queuelist) {
 | 
					 | 
				
			||||||
		if (blk_req_can_dispatch_to_zone(rq))
 | 
					 | 
				
			||||||
			return rq;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return NULL;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * For the specified data direction, return the next request to dispatch using
 | 
					 | 
				
			||||||
 * sector position sorted lists.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static struct request *
 | 
					 | 
				
			||||||
deadline_next_request(struct deadline_data *dd, int data_dir)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct request *rq;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
 | 
					 | 
				
			||||||
		return NULL;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	rq = dd->next_rq[data_dir];
 | 
					 | 
				
			||||||
	if (!rq)
 | 
					 | 
				
			||||||
		return NULL;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (data_dir == READ || !blk_queue_is_zoned(rq->q))
 | 
					 | 
				
			||||||
		return rq;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Look for a write request that can be dispatched, that is one with
 | 
					 | 
				
			||||||
	 * an unlocked target zone.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	while (rq) {
 | 
					 | 
				
			||||||
		if (blk_req_can_dispatch_to_zone(rq))
 | 
					 | 
				
			||||||
			return rq;
 | 
					 | 
				
			||||||
		rq = deadline_latter_request(rq);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return NULL;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * deadline_dispatch_requests selects the best request according to
 | 
					 | 
				
			||||||
 * read/write expire, fifo_batch, etc
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static int deadline_dispatch_requests(struct request_queue *q, int force)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct deadline_data *dd = q->elevator->elevator_data;
 | 
					 | 
				
			||||||
	const int reads = !list_empty(&dd->fifo_list[READ]);
 | 
					 | 
				
			||||||
	const int writes = !list_empty(&dd->fifo_list[WRITE]);
 | 
					 | 
				
			||||||
	struct request *rq, *next_rq;
 | 
					 | 
				
			||||||
	int data_dir;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * batches are currently reads XOR writes
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	rq = deadline_next_request(dd, WRITE);
 | 
					 | 
				
			||||||
	if (!rq)
 | 
					 | 
				
			||||||
		rq = deadline_next_request(dd, READ);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (rq && dd->batching < dd->fifo_batch)
 | 
					 | 
				
			||||||
		/* we have a next request are still entitled to batch */
 | 
					 | 
				
			||||||
		goto dispatch_request;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * at this point we are not running a batch. select the appropriate
 | 
					 | 
				
			||||||
	 * data direction (read / write)
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (reads) {
 | 
					 | 
				
			||||||
		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (deadline_fifo_request(dd, WRITE) &&
 | 
					 | 
				
			||||||
		    (dd->starved++ >= dd->writes_starved))
 | 
					 | 
				
			||||||
			goto dispatch_writes;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		data_dir = READ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		goto dispatch_find_request;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * there are either no reads or writes have been starved
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (writes) {
 | 
					 | 
				
			||||||
dispatch_writes:
 | 
					 | 
				
			||||||
		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		dd->starved = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		data_dir = WRITE;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		goto dispatch_find_request;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
dispatch_find_request:
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * we are not running a batch, find best request for selected data_dir
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	next_rq = deadline_next_request(dd, data_dir);
 | 
					 | 
				
			||||||
	if (deadline_check_fifo(dd, data_dir) || !next_rq) {
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * A deadline has expired, the last request was in the other
 | 
					 | 
				
			||||||
		 * direction, or we have run out of higher-sectored requests.
 | 
					 | 
				
			||||||
		 * Start again from the request with the earliest expiry time.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		rq = deadline_fifo_request(dd, data_dir);
 | 
					 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * The last req was the same dir and we have a next request in
 | 
					 | 
				
			||||||
		 * sort order. No expired requests so continue on from here.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		rq = next_rq;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * For a zoned block device, if we only have writes queued and none of
 | 
					 | 
				
			||||||
	 * them can be dispatched, rq will be NULL.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	if (!rq)
 | 
					 | 
				
			||||||
		return 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	dd->batching = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
dispatch_request:
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * rq is the selected appropriate request.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	dd->batching++;
 | 
					 | 
				
			||||||
	deadline_move_request(dd, rq);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return 1;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * For zoned block devices, write unlock the target zone of completed
 | 
					 | 
				
			||||||
 * write requests.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static void
 | 
					 | 
				
			||||||
deadline_completed_request(struct request_queue *q, struct request *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	blk_req_zone_write_unlock(rq);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void deadline_exit_queue(struct elevator_queue *e)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct deadline_data *dd = e->elevator_data;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	BUG_ON(!list_empty(&dd->fifo_list[READ]));
 | 
					 | 
				
			||||||
	BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	kfree(dd);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * initialize elevator private data (deadline_data).
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static int deadline_init_queue(struct request_queue *q, struct elevator_type *e)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct deadline_data *dd;
 | 
					 | 
				
			||||||
	struct elevator_queue *eq;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	eq = elevator_alloc(q, e);
 | 
					 | 
				
			||||||
	if (!eq)
 | 
					 | 
				
			||||||
		return -ENOMEM;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
 | 
					 | 
				
			||||||
	if (!dd) {
 | 
					 | 
				
			||||||
		kobject_put(&eq->kobj);
 | 
					 | 
				
			||||||
		return -ENOMEM;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	eq->elevator_data = dd;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	INIT_LIST_HEAD(&dd->fifo_list[READ]);
 | 
					 | 
				
			||||||
	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
 | 
					 | 
				
			||||||
	dd->sort_list[READ] = RB_ROOT;
 | 
					 | 
				
			||||||
	dd->sort_list[WRITE] = RB_ROOT;
 | 
					 | 
				
			||||||
	dd->fifo_expire[READ] = read_expire;
 | 
					 | 
				
			||||||
	dd->fifo_expire[WRITE] = write_expire;
 | 
					 | 
				
			||||||
	dd->writes_starved = writes_starved;
 | 
					 | 
				
			||||||
	dd->front_merges = 1;
 | 
					 | 
				
			||||||
	dd->fifo_batch = fifo_batch;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	spin_lock_irq(q->queue_lock);
 | 
					 | 
				
			||||||
	q->elevator = eq;
 | 
					 | 
				
			||||||
	spin_unlock_irq(q->queue_lock);
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * sysfs parts below
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static ssize_t
 | 
					 | 
				
			||||||
deadline_var_show(int var, char *page)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return sprintf(page, "%d\n", var);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void
 | 
					 | 
				
			||||||
deadline_var_store(int *var, const char *page)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	char *p = (char *) page;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	*var = simple_strtol(p, &p, 10);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
 | 
					 | 
				
			||||||
static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
 | 
					 | 
				
			||||||
{									\
 | 
					 | 
				
			||||||
	struct deadline_data *dd = e->elevator_data;			\
 | 
					 | 
				
			||||||
	int __data = __VAR;						\
 | 
					 | 
				
			||||||
	if (__CONV)							\
 | 
					 | 
				
			||||||
		__data = jiffies_to_msecs(__data);			\
 | 
					 | 
				
			||||||
	return deadline_var_show(__data, (page));			\
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
 | 
					 | 
				
			||||||
SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
 | 
					 | 
				
			||||||
SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
 | 
					 | 
				
			||||||
SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
 | 
					 | 
				
			||||||
SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
 | 
					 | 
				
			||||||
#undef SHOW_FUNCTION
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
 | 
					 | 
				
			||||||
static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)	\
 | 
					 | 
				
			||||||
{									\
 | 
					 | 
				
			||||||
	struct deadline_data *dd = e->elevator_data;			\
 | 
					 | 
				
			||||||
	int __data;							\
 | 
					 | 
				
			||||||
	deadline_var_store(&__data, (page));				\
 | 
					 | 
				
			||||||
	if (__data < (MIN))						\
 | 
					 | 
				
			||||||
		__data = (MIN);						\
 | 
					 | 
				
			||||||
	else if (__data > (MAX))					\
 | 
					 | 
				
			||||||
		__data = (MAX);						\
 | 
					 | 
				
			||||||
	if (__CONV)							\
 | 
					 | 
				
			||||||
		*(__PTR) = msecs_to_jiffies(__data);			\
 | 
					 | 
				
			||||||
	else								\
 | 
					 | 
				
			||||||
		*(__PTR) = __data;					\
 | 
					 | 
				
			||||||
	return count;							\
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
 | 
					 | 
				
			||||||
STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
 | 
					 | 
				
			||||||
STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
 | 
					 | 
				
			||||||
STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
 | 
					 | 
				
			||||||
STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
 | 
					 | 
				
			||||||
#undef STORE_FUNCTION
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define DD_ATTR(name) \
 | 
					 | 
				
			||||||
	__ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static struct elv_fs_entry deadline_attrs[] = {
 | 
					 | 
				
			||||||
	DD_ATTR(read_expire),
 | 
					 | 
				
			||||||
	DD_ATTR(write_expire),
 | 
					 | 
				
			||||||
	DD_ATTR(writes_starved),
 | 
					 | 
				
			||||||
	DD_ATTR(front_merges),
 | 
					 | 
				
			||||||
	DD_ATTR(fifo_batch),
 | 
					 | 
				
			||||||
	__ATTR_NULL
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static struct elevator_type iosched_deadline = {
 | 
					 | 
				
			||||||
	.ops.sq = {
 | 
					 | 
				
			||||||
		.elevator_merge_fn = 		deadline_merge,
 | 
					 | 
				
			||||||
		.elevator_merged_fn =		deadline_merged_request,
 | 
					 | 
				
			||||||
		.elevator_merge_req_fn =	deadline_merged_requests,
 | 
					 | 
				
			||||||
		.elevator_dispatch_fn =		deadline_dispatch_requests,
 | 
					 | 
				
			||||||
		.elevator_completed_req_fn =	deadline_completed_request,
 | 
					 | 
				
			||||||
		.elevator_add_req_fn =		deadline_add_request,
 | 
					 | 
				
			||||||
		.elevator_former_req_fn =	elv_rb_former_request,
 | 
					 | 
				
			||||||
		.elevator_latter_req_fn =	elv_rb_latter_request,
 | 
					 | 
				
			||||||
		.elevator_init_fn =		deadline_init_queue,
 | 
					 | 
				
			||||||
		.elevator_exit_fn =		deadline_exit_queue,
 | 
					 | 
				
			||||||
	},
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	.elevator_attrs = deadline_attrs,
 | 
					 | 
				
			||||||
	.elevator_name = "deadline",
 | 
					 | 
				
			||||||
	.elevator_owner = THIS_MODULE,
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int __init deadline_init(void)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return elv_register(&iosched_deadline);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void __exit deadline_exit(void)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	elv_unregister(&iosched_deadline);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
module_init(deadline_init);
 | 
					 | 
				
			||||||
module_exit(deadline_exit);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
MODULE_AUTHOR("Jens Axboe");
 | 
					 | 
				
			||||||
MODULE_LICENSE("GPL");
 | 
					 | 
				
			||||||
MODULE_DESCRIPTION("deadline IO scheduler");
 | 
					 | 
				
			||||||
| 
						 | 
					@ -225,8 +225,6 @@ int elevator_init(struct request_queue *q)
 | 
				
			||||||
							chosen_elevator);
 | 
												chosen_elevator);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!e)
 | 
					 | 
				
			||||||
		e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false);
 | 
					 | 
				
			||||||
	if (!e) {
 | 
						if (!e) {
 | 
				
			||||||
		printk(KERN_ERR
 | 
							printk(KERN_ERR
 | 
				
			||||||
			"Default I/O scheduler not found. Using noop.\n");
 | 
								"Default I/O scheduler not found. Using noop.\n");
 | 
				
			||||||
| 
						 | 
					@ -356,68 +354,6 @@ struct request *elv_rb_find(struct rb_root *root, sector_t sector)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(elv_rb_find);
 | 
					EXPORT_SYMBOL(elv_rb_find);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Insert rq into dispatch queue of q.  Queue lock must be held on
 | 
					 | 
				
			||||||
 * entry.  rq is sort instead into the dispatch queue. To be used by
 | 
					 | 
				
			||||||
 * specific elevators.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
void elv_dispatch_sort(struct request_queue *q, struct request *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	sector_t boundary;
 | 
					 | 
				
			||||||
	struct list_head *entry;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (q->last_merge == rq)
 | 
					 | 
				
			||||||
		q->last_merge = NULL;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	elv_rqhash_del(q, rq);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	q->nr_sorted--;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	boundary = q->end_sector;
 | 
					 | 
				
			||||||
	list_for_each_prev(entry, &q->queue_head) {
 | 
					 | 
				
			||||||
		struct request *pos = list_entry_rq(entry);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (req_op(rq) != req_op(pos))
 | 
					 | 
				
			||||||
			break;
 | 
					 | 
				
			||||||
		if (rq_data_dir(rq) != rq_data_dir(pos))
 | 
					 | 
				
			||||||
			break;
 | 
					 | 
				
			||||||
		if (pos->rq_flags & (RQF_STARTED | RQF_SOFTBARRIER))
 | 
					 | 
				
			||||||
			break;
 | 
					 | 
				
			||||||
		if (blk_rq_pos(rq) >= boundary) {
 | 
					 | 
				
			||||||
			if (blk_rq_pos(pos) < boundary)
 | 
					 | 
				
			||||||
				continue;
 | 
					 | 
				
			||||||
		} else {
 | 
					 | 
				
			||||||
			if (blk_rq_pos(pos) >= boundary)
 | 
					 | 
				
			||||||
				break;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		if (blk_rq_pos(rq) >= blk_rq_pos(pos))
 | 
					 | 
				
			||||||
			break;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	list_add(&rq->queuelist, entry);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
EXPORT_SYMBOL(elv_dispatch_sort);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Insert rq into dispatch queue of q.  Queue lock must be held on
 | 
					 | 
				
			||||||
 * entry.  rq is added to the back of the dispatch queue. To be used by
 | 
					 | 
				
			||||||
 * specific elevators.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	if (q->last_merge == rq)
 | 
					 | 
				
			||||||
		q->last_merge = NULL;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	elv_rqhash_del(q, rq);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	q->nr_sorted--;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	q->end_sector = rq_end_sector(rq);
 | 
					 | 
				
			||||||
	q->boundary_rq = rq;
 | 
					 | 
				
			||||||
	list_add_tail(&rq->queuelist, &q->queue_head);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
EXPORT_SYMBOL(elv_dispatch_add_tail);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
enum elv_merge elv_merge(struct request_queue *q, struct request **req,
 | 
					enum elv_merge elv_merge(struct request_queue *q, struct request **req,
 | 
				
			||||||
		struct bio *bio)
 | 
							struct bio *bio)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -881,12 +817,6 @@ int elv_register(struct elevator_type *e)
 | 
				
			||||||
	list_add_tail(&e->list, &elv_list);
 | 
						list_add_tail(&e->list, &elv_list);
 | 
				
			||||||
	spin_unlock(&elv_list_lock);
 | 
						spin_unlock(&elv_list_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* print pretty message */
 | 
					 | 
				
			||||||
	if (elevator_match(e, chosen_elevator) ||
 | 
					 | 
				
			||||||
			(!*chosen_elevator &&
 | 
					 | 
				
			||||||
			 elevator_match(e, CONFIG_DEFAULT_IOSCHED)))
 | 
					 | 
				
			||||||
				def = " (default)";
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
 | 
						printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
 | 
				
			||||||
								def);
 | 
													def);
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,124 +0,0 @@
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * elevator noop
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <linux/blkdev.h>
 | 
					 | 
				
			||||||
#include <linux/elevator.h>
 | 
					 | 
				
			||||||
#include <linux/bio.h>
 | 
					 | 
				
			||||||
#include <linux/module.h>
 | 
					 | 
				
			||||||
#include <linux/slab.h>
 | 
					 | 
				
			||||||
#include <linux/init.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
struct noop_data {
 | 
					 | 
				
			||||||
	struct list_head queue;
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void noop_merged_requests(struct request_queue *q, struct request *rq,
 | 
					 | 
				
			||||||
				 struct request *next)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	list_del_init(&next->queuelist);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int noop_dispatch(struct request_queue *q, int force)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct noop_data *nd = q->elevator->elevator_data;
 | 
					 | 
				
			||||||
	struct request *rq;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	rq = list_first_entry_or_null(&nd->queue, struct request, queuelist);
 | 
					 | 
				
			||||||
	if (rq) {
 | 
					 | 
				
			||||||
		list_del_init(&rq->queuelist);
 | 
					 | 
				
			||||||
		elv_dispatch_sort(q, rq);
 | 
					 | 
				
			||||||
		return 1;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void noop_add_request(struct request_queue *q, struct request *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct noop_data *nd = q->elevator->elevator_data;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	list_add_tail(&rq->queuelist, &nd->queue);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static struct request *
 | 
					 | 
				
			||||||
noop_former_request(struct request_queue *q, struct request *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct noop_data *nd = q->elevator->elevator_data;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (rq->queuelist.prev == &nd->queue)
 | 
					 | 
				
			||||||
		return NULL;
 | 
					 | 
				
			||||||
	return list_prev_entry(rq, queuelist);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static struct request *
 | 
					 | 
				
			||||||
noop_latter_request(struct request_queue *q, struct request *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct noop_data *nd = q->elevator->elevator_data;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (rq->queuelist.next == &nd->queue)
 | 
					 | 
				
			||||||
		return NULL;
 | 
					 | 
				
			||||||
	return list_next_entry(rq, queuelist);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int noop_init_queue(struct request_queue *q, struct elevator_type *e)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct noop_data *nd;
 | 
					 | 
				
			||||||
	struct elevator_queue *eq;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	eq = elevator_alloc(q, e);
 | 
					 | 
				
			||||||
	if (!eq)
 | 
					 | 
				
			||||||
		return -ENOMEM;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node);
 | 
					 | 
				
			||||||
	if (!nd) {
 | 
					 | 
				
			||||||
		kobject_put(&eq->kobj);
 | 
					 | 
				
			||||||
		return -ENOMEM;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	eq->elevator_data = nd;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	INIT_LIST_HEAD(&nd->queue);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	spin_lock_irq(q->queue_lock);
 | 
					 | 
				
			||||||
	q->elevator = eq;
 | 
					 | 
				
			||||||
	spin_unlock_irq(q->queue_lock);
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void noop_exit_queue(struct elevator_queue *e)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct noop_data *nd = e->elevator_data;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	BUG_ON(!list_empty(&nd->queue));
 | 
					 | 
				
			||||||
	kfree(nd);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static struct elevator_type elevator_noop = {
 | 
					 | 
				
			||||||
	.ops.sq = {
 | 
					 | 
				
			||||||
		.elevator_merge_req_fn		= noop_merged_requests,
 | 
					 | 
				
			||||||
		.elevator_dispatch_fn		= noop_dispatch,
 | 
					 | 
				
			||||||
		.elevator_add_req_fn		= noop_add_request,
 | 
					 | 
				
			||||||
		.elevator_former_req_fn		= noop_former_request,
 | 
					 | 
				
			||||||
		.elevator_latter_req_fn		= noop_latter_request,
 | 
					 | 
				
			||||||
		.elevator_init_fn		= noop_init_queue,
 | 
					 | 
				
			||||||
		.elevator_exit_fn		= noop_exit_queue,
 | 
					 | 
				
			||||||
	},
 | 
					 | 
				
			||||||
	.elevator_name = "noop",
 | 
					 | 
				
			||||||
	.elevator_owner = THIS_MODULE,
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int __init noop_init(void)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return elv_register(&elevator_noop);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void __exit noop_exit(void)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	elv_unregister(&elevator_noop);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
module_init(noop_init);
 | 
					 | 
				
			||||||
module_exit(noop_exit);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
MODULE_AUTHOR("Jens Axboe");
 | 
					 | 
				
			||||||
MODULE_LICENSE("GPL");
 | 
					 | 
				
			||||||
MODULE_DESCRIPTION("No-op IO scheduler");
 | 
					 | 
				
			||||||
		Loading…
	
		Reference in a new issue