224 lines
7.0 KiB
Diff
224 lines
7.0 KiB
Diff
Subject: irqwork: push most work into softirq context
|
|
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
|
Date: Tue, 23 Jun 2015 15:32:51 +0200
|
|
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.19/older/patches-4.19.10-rt8.tar.xz
|
|
|
|
Initially we defered all irqwork into softirq because we didn't want the
|
|
latency spikes if perf or another user was busy and delayed the RT task.
|
|
The NOHZ trigger (nohz_full_kick_work) was the first user that did not work
|
|
as expected if it did not run in the original irqwork context so we had to
|
|
bring it back somehow for it. push_irq_work_func is the second one that
|
|
requires this.
|
|
|
|
This patch adds the IRQ_WORK_HARD_IRQ which makes sure the callback runs
|
|
in raw-irq context. Everything else is defered into softirq context. Without
|
|
-RT we have the orignal behavior.
|
|
|
|
This patch incorporates tglx orignal work which revoked a little bringing back
|
|
the arch_irq_work_raise() if possible and a few fixes from Steven Rostedt and
|
|
Mike Galbraith,
|
|
|
|
[bigeasy: melt tglx's irq_work_tick_soft() which splits irq_work_tick() into a
|
|
hard and soft variant]
|
|
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
|
---
|
|
include/linux/irq_work.h | 8 ++++++
|
|
kernel/irq_work.c | 60 ++++++++++++++++++++++++++++++++++++-----------
|
|
kernel/rcu/tree.c | 1
|
|
kernel/sched/topology.c | 1
|
|
kernel/time/tick-sched.c | 1
|
|
kernel/time/timer.c | 2 +
|
|
6 files changed, 60 insertions(+), 13 deletions(-)
|
|
|
|
--- a/include/linux/irq_work.h
|
|
+++ b/include/linux/irq_work.h
|
|
@@ -18,6 +18,8 @@
|
|
|
|
/* Doesn't want IPI, wait for tick: */
|
|
#define IRQ_WORK_LAZY BIT(2)
|
|
+/* Run hard IRQ context, even on RT */
|
|
+#define IRQ_WORK_HARD_IRQ BIT(3)
|
|
|
|
#define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY)
|
|
|
|
@@ -52,4 +54,10 @@ static inline bool irq_work_needs_cpu(vo
|
|
static inline void irq_work_run(void) { }
|
|
#endif
|
|
|
|
+#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
|
|
+void irq_work_tick_soft(void);
|
|
+#else
|
|
+static inline void irq_work_tick_soft(void) { }
|
|
+#endif
|
|
+
|
|
#endif /* _LINUX_IRQ_WORK_H */
|
|
--- a/kernel/irq_work.c
|
|
+++ b/kernel/irq_work.c
|
|
@@ -17,6 +17,7 @@
|
|
#include <linux/cpu.h>
|
|
#include <linux/notifier.h>
|
|
#include <linux/smp.h>
|
|
+#include <linux/interrupt.h>
|
|
#include <asm/processor.h>
|
|
|
|
|
|
@@ -64,6 +65,8 @@ void __weak arch_irq_work_raise(void)
|
|
*/
|
|
bool irq_work_queue_on(struct irq_work *work, int cpu)
|
|
{
|
|
+ struct llist_head *list;
|
|
+
|
|
/* All work should have been flushed before going offline */
|
|
WARN_ON_ONCE(cpu_is_offline(cpu));
|
|
|
|
@@ -76,7 +79,12 @@ bool irq_work_queue_on(struct irq_work *
|
|
if (!irq_work_claim(work))
|
|
return false;
|
|
|
|
- if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
|
|
+ if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ))
|
|
+ list = &per_cpu(lazy_list, cpu);
|
|
+ else
|
|
+ list = &per_cpu(raised_list, cpu);
|
|
+
|
|
+ if (llist_add(&work->llnode, list))
|
|
arch_send_call_function_single_ipi(cpu);
|
|
|
|
#else /* #ifdef CONFIG_SMP */
|
|
@@ -89,6 +97,9 @@ bool irq_work_queue_on(struct irq_work *
|
|
/* Enqueue the irq work @work on the current CPU */
|
|
bool irq_work_queue(struct irq_work *work)
|
|
{
|
|
+ struct llist_head *list;
|
|
+ bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL);
|
|
+
|
|
/* Only queue if not already pending */
|
|
if (!irq_work_claim(work))
|
|
return false;
|
|
@@ -96,13 +107,15 @@ bool irq_work_queue(struct irq_work *wor
|
|
/* Queue the entry and raise the IPI if needed. */
|
|
preempt_disable();
|
|
|
|
- /* If the work is "lazy", handle it from next tick if any */
|
|
- if (work->flags & IRQ_WORK_LAZY) {
|
|
- if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
|
|
- tick_nohz_tick_stopped())
|
|
- arch_irq_work_raise();
|
|
- } else {
|
|
- if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
|
|
+ lazy_work = work->flags & IRQ_WORK_LAZY;
|
|
+
|
|
+ if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ)))
|
|
+ list = this_cpu_ptr(&lazy_list);
|
|
+ else
|
|
+ list = this_cpu_ptr(&raised_list);
|
|
+
|
|
+ if (llist_add(&work->llnode, list)) {
|
|
+ if (!lazy_work || tick_nohz_tick_stopped())
|
|
arch_irq_work_raise();
|
|
}
|
|
|
|
@@ -119,9 +132,8 @@ bool irq_work_needs_cpu(void)
|
|
raised = this_cpu_ptr(&raised_list);
|
|
lazy = this_cpu_ptr(&lazy_list);
|
|
|
|
- if (llist_empty(raised) || arch_irq_work_has_interrupt())
|
|
- if (llist_empty(lazy))
|
|
- return false;
|
|
+ if (llist_empty(raised) && llist_empty(lazy))
|
|
+ return false;
|
|
|
|
/* All work should have been flushed before going offline */
|
|
WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
|
|
@@ -135,8 +147,12 @@ static void irq_work_run_list(struct lli
|
|
struct llist_node *llnode;
|
|
unsigned long flags;
|
|
|
|
+#ifndef CONFIG_PREEMPT_RT_FULL
|
|
+ /*
|
|
+ * nort: On RT IRQ-work may run in SOFTIRQ context.
|
|
+ */
|
|
BUG_ON(!irqs_disabled());
|
|
-
|
|
+#endif
|
|
if (llist_empty(list))
|
|
return;
|
|
|
|
@@ -168,7 +184,16 @@ static void irq_work_run_list(struct lli
|
|
void irq_work_run(void)
|
|
{
|
|
irq_work_run_list(this_cpu_ptr(&raised_list));
|
|
- irq_work_run_list(this_cpu_ptr(&lazy_list));
|
|
+ if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) {
|
|
+ /*
|
|
+ * NOTE: we raise softirq via IPI for safety,
|
|
+ * and execute in irq_work_tick() to move the
|
|
+ * overhead from hard to soft irq context.
|
|
+ */
|
|
+ if (!llist_empty(this_cpu_ptr(&lazy_list)))
|
|
+ raise_softirq(TIMER_SOFTIRQ);
|
|
+ } else
|
|
+ irq_work_run_list(this_cpu_ptr(&lazy_list));
|
|
}
|
|
EXPORT_SYMBOL_GPL(irq_work_run);
|
|
|
|
@@ -178,8 +203,17 @@ void irq_work_tick(void)
|
|
|
|
if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
|
|
irq_work_run_list(raised);
|
|
+
|
|
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
|
|
+ irq_work_run_list(this_cpu_ptr(&lazy_list));
|
|
+}
|
|
+
|
|
+#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
|
|
+void irq_work_tick_soft(void)
|
|
+{
|
|
irq_work_run_list(this_cpu_ptr(&lazy_list));
|
|
}
|
|
+#endif
|
|
|
|
/*
|
|
* Synchronize against the irq_work @entry, ensures the entry is not
|
|
--- a/kernel/rcu/tree.c
|
|
+++ b/kernel/rcu/tree.c
|
|
@@ -1296,6 +1296,7 @@ static int rcu_implicit_dynticks_qs(stru
|
|
!rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
|
|
(rnp->ffmask & rdp->grpmask)) {
|
|
init_irq_work(&rdp->rcu_iw, rcu_iw_handler);
|
|
+ rdp->rcu_iw.flags = IRQ_WORK_HARD_IRQ;
|
|
rdp->rcu_iw_pending = true;
|
|
rdp->rcu_iw_gp_seq = rnp->gp_seq;
|
|
irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
|
|
--- a/kernel/sched/topology.c
|
|
+++ b/kernel/sched/topology.c
|
|
@@ -279,6 +279,7 @@ static int init_rootdomain(struct root_d
|
|
rd->rto_cpu = -1;
|
|
raw_spin_lock_init(&rd->rto_lock);
|
|
init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
|
|
+ rd->rto_push_work.flags |= IRQ_WORK_HARD_IRQ;
|
|
#endif
|
|
|
|
init_dl_bw(&rd->dl_bw);
|
|
--- a/kernel/time/tick-sched.c
|
|
+++ b/kernel/time/tick-sched.c
|
|
@@ -232,6 +232,7 @@ static void nohz_full_kick_func(struct i
|
|
|
|
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
|
|
.func = nohz_full_kick_func,
|
|
+ .flags = IRQ_WORK_HARD_IRQ,
|
|
};
|
|
|
|
/*
|
|
--- a/kernel/time/timer.c
|
|
+++ b/kernel/time/timer.c
|
|
@@ -1733,6 +1733,8 @@ static __latent_entropy void run_timer_s
|
|
{
|
|
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
|
|
|
|
+ irq_work_tick_soft();
|
|
+
|
|
__run_timers(base);
|
|
if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
|
|
__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
|