linux/debian/patches-rt/0011-sched-fair-Robustify-C...

From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 7 Jan 2019 13:52:31 +0100
Subject: [PATCH 011/290] sched/fair: Robustify CFS-bandwidth timer locking
Origin: https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit?id=1a1e9d89852d01e24232b177091911f075b64f65

Traditionally hrtimer callbacks were run with IRQs disabled, but with
the introduction of HRTIMER_MODE_SOFT it is possible they run from
SoftIRQ context, which does _NOT_ have IRQs disabled.

Allow for the CFS bandwidth timers (period_timer and slack_timer) to
be ran from SoftIRQ context; this entails removing the assumption that
IRQs are already disabled from the locking.

While mainline doesn't strictly need this, -RT forces all timers not
explicitly marked with MODE_HARD into MODE_SOFT and trips over this.
And marking these timers as MODE_HARD doesn't make sense as they're
not required for RT operation and can potentially be quite expensive.

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reported-by: Tom Putzeys <tom.putzeys@be.atlascopco.com>
Tested-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190107125231.GE14122@hirez.programming.kicks-ass.net
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
[Salvatore Bonaccorso: Backport to 4.19.84 due to context changes caused by
502bd151448c ("sched/fair: Fix low cpu usage with high throttling by removing
expiration of cpu-local slices")]
---
 kernel/sched/fair.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4560,7 +4560,7 @@
 		struct rq *rq = rq_of(cfs_rq);
 		struct rq_flags rf;
 
-		rq_lock(rq, &rf);
+		rq_lock_irqsave(rq, &rf);
 		if (!cfs_rq_throttled(cfs_rq))
 			goto next;
 
@@ -4579,7 +4579,7 @@
 			unthrottle_cfs_rq(cfs_rq);
 
 next:
-		rq_unlock(rq, &rf);
+		rq_unlock_irqrestore(rq, &rf);
 
 		if (!remaining)
 			break;
@@ -4595,7 +4595,7 @@
  * period the timer is deactivated until scheduling resumes; cfs_b->idle is
  * used to track this state.
  */
-static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
+static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags)
 {
 	u64 runtime;
 	int throttled;
@@ -4635,10 +4635,10 @@
 	while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) {
 		runtime = cfs_b->runtime;
 		cfs_b->distribute_running = 1;
-		raw_spin_unlock(&cfs_b->lock);
+		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
 		/* we can't nest cfs_b->lock while distributing bandwidth */
 		runtime = distribute_cfs_runtime(cfs_b, runtime);
-		raw_spin_lock(&cfs_b->lock);
+		raw_spin_lock_irqsave(&cfs_b->lock, flags);
 
 		cfs_b->distribute_running = 0;
 		throttled = !list_empty(&cfs_b->throttled_cfs_rq);
@@ -4746,16 +4746,17 @@
 static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
 {
 	u64 runtime = 0, slice = sched_cfs_bandwidth_slice();
+	unsigned long flags;
 
 	/* confirm we're still not at a refresh boundary */
-	raw_spin_lock(&cfs_b->lock);
+	raw_spin_lock_irqsave(&cfs_b->lock, flags);
 	if (cfs_b->distribute_running) {
-		raw_spin_unlock(&cfs_b->lock);
+		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
 		return;
 	}
 
 	if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) {
-		raw_spin_unlock(&cfs_b->lock);
+		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
 		return;
 	}
 
@@ -4765,17 +4766,17 @@
 	if (runtime)
 		cfs_b->distribute_running = 1;
 
-	raw_spin_unlock(&cfs_b->lock);
+	raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
 
 	if (!runtime)
 		return;
 
 	runtime = distribute_cfs_runtime(cfs_b, runtime);
 
-	raw_spin_lock(&cfs_b->lock);
+	raw_spin_lock_irqsave(&cfs_b->lock, flags);
 	cfs_b->runtime -= min(runtime, cfs_b->runtime);
 	cfs_b->distribute_running = 0;
-	raw_spin_unlock(&cfs_b->lock);
+	raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
 }
 
 /*
@@ -4855,11 +4856,12 @@
 {
 	struct cfs_bandwidth *cfs_b =
 		container_of(timer, struct cfs_bandwidth, period_timer);
+	unsigned long flags;
 	int overrun;
 	int idle = 0;
 	int count = 0;
 
-	raw_spin_lock(&cfs_b->lock);
+	raw_spin_lock_irqsave(&cfs_b->lock, flags);
 	for (;;) {
 		overrun = hrtimer_forward_now(timer, cfs_b->period);
 		if (!overrun)
@@ -4887,11 +4889,11 @@
 			count = 0;
 		}
 
-		idle = do_sched_cfs_period_timer(cfs_b, overrun);
+		idle = do_sched_cfs_period_timer(cfs_b, overrun, flags);
 	}
 	if (idle)
 		cfs_b->period_active = 0;
-	raw_spin_unlock(&cfs_b->lock);
+	raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
 
 	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
 }
[rt] Update to 4.19.13-rt10 2019-01-11 14:52:15 +00:00			`From: Peter Zijlstra <peterz@infradead.org>`
			`Date: Mon, 7 Jan 2019 13:52:31 +0100`
[rt] Update to 4.19.82-rt30 2019-11-25 00:04:39 +00:00			`Subject: [PATCH 011/290] sched/fair: Robustify CFS-bandwidth timer locking`
			`Origin: https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit?id=1a1e9d89852d01e24232b177091911f075b64f65`
[rt] Update to 4.19.13-rt10 2019-01-11 14:52:15 +00:00
			`Traditionally hrtimer callbacks were run with IRQs disabled, but with`
			`the introduction of HRTIMER_MODE_SOFT it is possible they run from`
			`SoftIRQ context, which does _NOT_ have IRQs disabled.`

			`Allow for the CFS bandwidth timers (period_timer and slack_timer) to`
			`be ran from SoftIRQ context; this entails removing the assumption that`
			`IRQs are already disabled from the locking.`

			`While mainline doesn't strictly need this, -RT forces all timers not`
			`explicitly marked with MODE_HARD into MODE_SOFT and trips over this.`
			`And marking these timers as MODE_HARD doesn't make sense as they're`
			`not required for RT operation and can potentially be quite expensive.`

			`Cc: Ingo Molnar <mingo@redhat.com>`
			`Cc: Thomas Gleixner <tglx@linutronix.de>`
			`Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>`
			`Reported-by: Tom Putzeys <tom.putzeys@be.atlascopco.com>`
			`Tested-by: Mike Galbraith <efault@gmx.de>`
			`Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>`
			`Link: https://lkml.kernel.org/r/20190107125231.GE14122@hirez.programming.kicks-ass.net`
			`Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>`
[rt] Refresh 0011-sched-fair-Robustify-CFS-bandwidth-timer-locking.patch (context changes in 4.19.84) 2019-12-01 08:34:48 +00:00			`[Salvatore Bonaccorso: Backport to 4.19.84 due to context changes caused by`
			`502bd151448c ("sched/fair: Fix low cpu usage with high throttling by removing`
			`expiration of cpu-local slices")]`
[rt] Update to 4.19.13-rt10 2019-01-11 14:52:15 +00:00			`---`
[rt] Update to 4.19.31-rt18 2019-04-08 23:49:20 +00:00			`kernel/sched/fair.c \| 30 ++++++++++++++++--------------`
[rt] Update to 4.19.13-rt10 2019-01-11 14:52:15 +00:00			`1 file changed, 16 insertions(+), 14 deletions(-)`

			`--- a/kernel/sched/fair.c`
			`+++ b/kernel/sched/fair.c`
[rt] Refresh 0011-sched-fair-Robustify-CFS-bandwidth-timer-locking.patch (context changes in 4.19.84) 2019-12-01 08:34:48 +00:00			`@@ -4560,7 +4560,7 @@`
[rt] Update to 4.19.13-rt10 2019-01-11 14:52:15 +00:00			`struct rq *rq = rq_of(cfs_rq);`
			`struct rq_flags rf;`

			`- rq_lock(rq, &rf);`
			`+ rq_lock_irqsave(rq, &rf);`
			`if (!cfs_rq_throttled(cfs_rq))`
			`goto next;`

[rt] Refresh 0011-sched-fair-Robustify-CFS-bandwidth-timer-locking.patch (context changes in 4.19.84) 2019-12-01 08:34:48 +00:00			`@@ -4579,7 +4579,7 @@`
[rt] Update to 4.19.13-rt10 2019-01-11 14:52:15 +00:00			`unthrottle_cfs_rq(cfs_rq);`

			`next:`
			`- rq_unlock(rq, &rf);`
			`+ rq_unlock_irqrestore(rq, &rf);`

			`if (!remaining)`
			`break;`
[rt] Refresh 0011-sched-fair-Robustify-CFS-bandwidth-timer-locking.patch (context changes in 4.19.84) 2019-12-01 08:34:48 +00:00			`@@ -4595,7 +4595,7 @@`
[rt] Update to 4.19.13-rt10 2019-01-11 14:52:15 +00:00			`* period the timer is deactivated until scheduling resumes; cfs_b->idle is`
			`* used to track this state.`
			`*/`
			`-static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)`
			`+static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags)`
			`{`
[rt] Refresh 0011-sched-fair-Robustify-CFS-bandwidth-timer-locking.patch (context changes in 4.19.84) 2019-12-01 08:34:48 +00:00			`u64 runtime;`
[rt] Update to 4.19.13-rt10 2019-01-11 14:52:15 +00:00			`int throttled;`
[rt] Refresh 0011-sched-fair-Robustify-CFS-bandwidth-timer-locking.patch (context changes in 4.19.84) 2019-12-01 08:34:48 +00:00			`@@ -4635,10 +4635,10 @@`
[rt] Update to 4.19.13-rt10 2019-01-11 14:52:15 +00:00			`while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) {`
			`runtime = cfs_b->runtime;`
			`cfs_b->distribute_running = 1;`
			`- raw_spin_unlock(&cfs_b->lock);`
			`+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);`
			`/* we can't nest cfs_b->lock while distributing bandwidth */`
[rt] Refresh 0011-sched-fair-Robustify-CFS-bandwidth-timer-locking.patch (context changes in 4.19.84) 2019-12-01 08:34:48 +00:00			`runtime = distribute_cfs_runtime(cfs_b, runtime);`
[rt] Update to 4.19.13-rt10 2019-01-11 14:52:15 +00:00			`- raw_spin_lock(&cfs_b->lock);`
			`+ raw_spin_lock_irqsave(&cfs_b->lock, flags);`

			`cfs_b->distribute_running = 0;`
			`throttled = !list_empty(&cfs_b->throttled_cfs_rq);`
[rt] Refresh 0011-sched-fair-Robustify-CFS-bandwidth-timer-locking.patch (context changes in 4.19.84) 2019-12-01 08:34:48 +00:00			`@@ -4746,16 +4746,17 @@`
[rt] Update to 4.19.13-rt10 2019-01-11 14:52:15 +00:00			`static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)`
			`{`
			`u64 runtime = 0, slice = sched_cfs_bandwidth_slice();`
			`+ unsigned long flags;`

			`/* confirm we're still not at a refresh boundary */`
			`- raw_spin_lock(&cfs_b->lock);`
			`+ raw_spin_lock_irqsave(&cfs_b->lock, flags);`
			`if (cfs_b->distribute_running) {`
			`- raw_spin_unlock(&cfs_b->lock);`
			`+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);`
			`return;`
			`}`

			`if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) {`
			`- raw_spin_unlock(&cfs_b->lock);`
			`+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);`
			`return;`
			`}`

[rt] Refresh 0011-sched-fair-Robustify-CFS-bandwidth-timer-locking.patch (context changes in 4.19.84) 2019-12-01 08:34:48 +00:00			`@@ -4765,17 +4766,17 @@`
[rt] Update to 4.19.13-rt10 2019-01-11 14:52:15 +00:00			`if (runtime)`
			`cfs_b->distribute_running = 1;`

			`- raw_spin_unlock(&cfs_b->lock);`
			`+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);`

			`if (!runtime)`
			`return;`

[rt] Refresh 0011-sched-fair-Robustify-CFS-bandwidth-timer-locking.patch (context changes in 4.19.84) 2019-12-01 08:34:48 +00:00			`runtime = distribute_cfs_runtime(cfs_b, runtime);`
[rt] Update to 4.19.13-rt10 2019-01-11 14:52:15 +00:00
			`- raw_spin_lock(&cfs_b->lock);`
			`+ raw_spin_lock_irqsave(&cfs_b->lock, flags);`
[rt] Refresh 0011-sched-fair-Robustify-CFS-bandwidth-timer-locking.patch (context changes in 4.19.84) 2019-12-01 08:34:48 +00:00			`cfs_b->runtime -= min(runtime, cfs_b->runtime);`
[rt] Update to 4.19.13-rt10 2019-01-11 14:52:15 +00:00			`cfs_b->distribute_running = 0;`
			`- raw_spin_unlock(&cfs_b->lock);`
			`+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);`
			`}`

			`/*`
[rt] Refresh 0011-sched-fair-Robustify-CFS-bandwidth-timer-locking.patch (context changes in 4.19.84) 2019-12-01 08:34:48 +00:00			`@@ -4855,11 +4856,12 @@`
[rt] Update to 4.19.13-rt10 2019-01-11 14:52:15 +00:00			`{`
			`struct cfs_bandwidth *cfs_b =`
			`container_of(timer, struct cfs_bandwidth, period_timer);`
			`+ unsigned long flags;`
			`int overrun;`
			`int idle = 0;`
Update to 4.19.37 * Refresh/drop patches as appropriate 2019-04-28 17:55:53 +00:00			`int count = 0;`
[rt] Update to 4.19.13-rt10 2019-01-11 14:52:15 +00:00
			`- raw_spin_lock(&cfs_b->lock);`
			`+ raw_spin_lock_irqsave(&cfs_b->lock, flags);`
			`for (;;) {`
			`overrun = hrtimer_forward_now(timer, cfs_b->period);`
			`if (!overrun)`
[rt] Refresh 0011-sched-fair-Robustify-CFS-bandwidth-timer-locking.patch (context changes in 4.19.84) 2019-12-01 08:34:48 +00:00			`@@ -4887,11 +4889,11 @@`
Update to 4.19.37 * Refresh/drop patches as appropriate 2019-04-28 17:55:53 +00:00			`count = 0;`
			`}`
[rt] Update to 4.19.13-rt10 2019-01-11 14:52:15 +00:00
			`- idle = do_sched_cfs_period_timer(cfs_b, overrun);`
			`+ idle = do_sched_cfs_period_timer(cfs_b, overrun, flags);`
			`}`
			`if (idle)`
			`cfs_b->period_active = 0;`
			`- raw_spin_unlock(&cfs_b->lock);`
			`+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);`

			`return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;`
			`}`