Delete unused rt patches
This commit is contained in:
parent
5cdb1bae58
commit
54a83bb474
|
@ -1,27 +0,0 @@
|
|||
From: Anders Roxell <anders.roxell@linaro.org>
|
||||
Date: Mon, 27 Apr 2015 22:53:08 +0200
|
||||
Subject: arm64: Mark PMU interrupt IRQF_NO_THREAD
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Mark the PMU interrupts as non-threadable, as is the case with
|
||||
arch/arm: d9c3365 ARM: 7813/1: Mark pmu interupt IRQF_NO_THREAD
|
||||
|
||||
[ upstream commit: 96045ed486b0 ]
|
||||
|
||||
Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
|
||||
---
|
||||
arch/arm64/kernel/perf_event.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/arm64/kernel/perf_event.c
|
||||
+++ b/arch/arm64/kernel/perf_event.c
|
||||
@@ -488,7 +488,7 @@ armpmu_reserve_hardware(struct arm_pmu *
|
||||
}
|
||||
|
||||
err = request_irq(irq, armpmu->handle_irq,
|
||||
- IRQF_NOBALANCING,
|
||||
+ IRQF_NOBALANCING | IRQF_NO_THREAD,
|
||||
"arm-pmu", armpmu);
|
||||
if (err) {
|
||||
pr_err("unable to request IRQ%d for ARM PMU counters\n",
|
|
@ -1,167 +0,0 @@
|
|||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Fri, 1 May 2015 08:27:50 -0700
|
||||
Subject: sched: Implement lockless wake-queues
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
This is useful for locking primitives that can effect multiple
|
||||
wakeups per operation and want to avoid lock internal lock contention
|
||||
by delaying the wakeups until we've released the lock internal locks.
|
||||
|
||||
Alternatively it can be used to avoid issuing multiple wakeups, and
|
||||
thus save a few cycles, in packet processing. Queue all target tasks
|
||||
and wakeup once you've processed all packets. That way you avoid
|
||||
waking the target task multiple times if there were multiple packets
|
||||
for the same task.
|
||||
|
||||
Properties of a wake_q are:
|
||||
- Lockless, as queue head must reside on the stack.
|
||||
- Being a queue, maintains wakeup order passed by the callers. This can
|
||||
be important for otherwise, in scenarios where highly contended locks
|
||||
could affect any reliance on lock fairness.
|
||||
- A queued task cannot be added again until it is woken up.
|
||||
|
||||
This patch adds the needed infrastructure into the scheduler code
|
||||
and uses the new wake_list to delay the futex wakeups until
|
||||
after we've released the hash bucket locks.
|
||||
|
||||
[upstream commit 7675104990ed255b9315a82ae827ff312a2a88a2]
|
||||
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
[tweaks, adjustments, comments, etc.]
|
||||
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Acked-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Chris Mason <clm@fb.com>
|
||||
Cc: Davidlohr Bueso <dave@stgolabs.net>
|
||||
Cc: George Spelvin <linux@horizon.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Manfred Spraul <manfred@colorfullife.com>
|
||||
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Link: http://lkml.kernel.org/r/1430494072-30283-2-git-send-email-dave@stgolabs.net
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/sched.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
|
||||
kernel/sched/core.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 92 insertions(+)
|
||||
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -900,6 +900,50 @@ enum cpu_idle_type {
|
||||
#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
|
||||
|
||||
/*
|
||||
+ * Wake-queues are lists of tasks with a pending wakeup, whose
|
||||
+ * callers have already marked the task as woken internally,
|
||||
+ * and can thus carry on. A common use case is being able to
|
||||
+ * do the wakeups once the corresponding user lock as been
|
||||
+ * released.
|
||||
+ *
|
||||
+ * We hold reference to each task in the list across the wakeup,
|
||||
+ * thus guaranteeing that the memory is still valid by the time
|
||||
+ * the actual wakeups are performed in wake_up_q().
|
||||
+ *
|
||||
+ * One per task suffices, because there's never a need for a task to be
|
||||
+ * in two wake queues simultaneously; it is forbidden to abandon a task
|
||||
+ * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is
|
||||
+ * already in a wake queue, the wakeup will happen soon and the second
|
||||
+ * waker can just skip it.
|
||||
+ *
|
||||
+ * The WAKE_Q macro declares and initializes the list head.
|
||||
+ * wake_up_q() does NOT reinitialize the list; it's expected to be
|
||||
+ * called near the end of a function, where the fact that the queue is
|
||||
+ * not used again will be easy to see by inspection.
|
||||
+ *
|
||||
+ * Note that this can cause spurious wakeups. schedule() callers
|
||||
+ * must ensure the call is done inside a loop, confirming that the
|
||||
+ * wakeup condition has in fact occurred.
|
||||
+ */
|
||||
+struct wake_q_node {
|
||||
+ struct wake_q_node *next;
|
||||
+};
|
||||
+
|
||||
+struct wake_q_head {
|
||||
+ struct wake_q_node *first;
|
||||
+ struct wake_q_node **lastp;
|
||||
+};
|
||||
+
|
||||
+#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
|
||||
+
|
||||
+#define WAKE_Q(name) \
|
||||
+ struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
|
||||
+
|
||||
+extern void wake_q_add(struct wake_q_head *head,
|
||||
+ struct task_struct *task);
|
||||
+extern void wake_up_q(struct wake_q_head *head);
|
||||
+
|
||||
+/*
|
||||
* sched-domains (multiprocessor balancing) declarations:
|
||||
*/
|
||||
#ifdef CONFIG_SMP
|
||||
@@ -1511,6 +1555,8 @@ struct task_struct {
|
||||
/* Protection of the PI data structures: */
|
||||
raw_spinlock_t pi_lock;
|
||||
|
||||
+ struct wake_q_node wake_q;
|
||||
+
|
||||
#ifdef CONFIG_RT_MUTEXES
|
||||
/* PI waiters blocked on a rt_mutex held by this task */
|
||||
struct rb_root pi_waiters;
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -541,6 +541,52 @@ static bool set_nr_if_polling(struct tas
|
||||
#endif
|
||||
#endif
|
||||
|
||||
+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
|
||||
+{
|
||||
+ struct wake_q_node *node = &task->wake_q;
|
||||
+
|
||||
+ /*
|
||||
+ * Atomically grab the task, if ->wake_q is !nil already it means
|
||||
+ * its already queued (either by us or someone else) and will get the
|
||||
+ * wakeup due to that.
|
||||
+ *
|
||||
+ * This cmpxchg() implies a full barrier, which pairs with the write
|
||||
+ * barrier implied by the wakeup in wake_up_list().
|
||||
+ */
|
||||
+ if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
|
||||
+ return;
|
||||
+
|
||||
+ get_task_struct(task);
|
||||
+
|
||||
+ /*
|
||||
+ * The head is context local, there can be no concurrency.
|
||||
+ */
|
||||
+ *head->lastp = node;
|
||||
+ head->lastp = &node->next;
|
||||
+}
|
||||
+
|
||||
+void wake_up_q(struct wake_q_head *head)
|
||||
+{
|
||||
+ struct wake_q_node *node = head->first;
|
||||
+
|
||||
+ while (node != WAKE_Q_TAIL) {
|
||||
+ struct task_struct *task;
|
||||
+
|
||||
+ task = container_of(node, struct task_struct, wake_q);
|
||||
+ BUG_ON(!task);
|
||||
+ /* task can safely be re-inserted now */
|
||||
+ node = node->next;
|
||||
+ task->wake_q.next = NULL;
|
||||
+
|
||||
+ /*
|
||||
+ * wake_up_process() implies a wmb() to pair with the queueing
|
||||
+ * in wake_q_add() so as not to miss wakeups.
|
||||
+ */
|
||||
+ wake_up_process(task);
|
||||
+ put_task_struct(task);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* resched_curr - mark rq's current task 'to be rescheduled now'.
|
||||
*
|
|
@ -1,120 +0,0 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:06 +0200
|
||||
Subject: sched/preempt, mm/fault: Count pagefault_disable() levels in pagefault_disabled
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Until now, pagefault_disable()/pagefault_enabled() used the preempt
|
||||
count to track whether in an environment with pagefaults disabled (can
|
||||
be queried via in_atomic()).
|
||||
|
||||
This patch introduces a separate counter in task_struct to count the
|
||||
level of pagefault_disable() calls. We'll keep manipulating the preempt
|
||||
count to retain compatibility to existing pagefault handlers.
|
||||
|
||||
It is now possible to verify whether in a pagefault_disable() envionment
|
||||
by calling pagefault_disabled(). In contrast to in_atomic() it will not
|
||||
be influenced by preempt_enable()/preempt_disable().
|
||||
|
||||
This patch is based on a patch from Ingo Molnar.
|
||||
|
||||
[upstream commit 8bcbde5480f9777f8b74d71493722c663e22c21b]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
include/linux/sched.h | 1 +
|
||||
include/linux/uaccess.h | 36 +++++++++++++++++++++++++++++-------
|
||||
kernel/fork.c | 3 +++
|
||||
3 files changed, 33 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -1724,6 +1724,7 @@ struct task_struct {
|
||||
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
|
||||
unsigned long task_state_change;
|
||||
#endif
|
||||
+ int pagefault_disabled;
|
||||
};
|
||||
|
||||
/* Future-safe accessor for struct task_struct's cpus_allowed. */
|
||||
--- a/include/linux/uaccess.h
|
||||
+++ b/include/linux/uaccess.h
|
||||
@@ -2,20 +2,36 @@
|
||||
#define __LINUX_UACCESS_H__
|
||||
|
||||
#include <linux/preempt.h>
|
||||
+#include <linux/sched.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
+static __always_inline void pagefault_disabled_inc(void)
|
||||
+{
|
||||
+ current->pagefault_disabled++;
|
||||
+}
|
||||
+
|
||||
+static __always_inline void pagefault_disabled_dec(void)
|
||||
+{
|
||||
+ current->pagefault_disabled--;
|
||||
+ WARN_ON(current->pagefault_disabled < 0);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
- * These routines enable/disable the pagefault handler in that
|
||||
- * it will not take any locks and go straight to the fixup table.
|
||||
+ * These routines enable/disable the pagefault handler. If disabled, it will
|
||||
+ * not take any locks and go straight to the fixup table.
|
||||
+ *
|
||||
+ * We increase the preempt and the pagefault count, to be able to distinguish
|
||||
+ * whether we run in simple atomic context or in a real pagefault_disable()
|
||||
+ * context.
|
||||
+ *
|
||||
+ * For now, after pagefault_disabled() has been called, we run in atomic
|
||||
+ * context. User access methods will not sleep.
|
||||
*
|
||||
- * They have great resemblance to the preempt_disable/enable calls
|
||||
- * and in fact they are identical; this is because currently there is
|
||||
- * no other way to make the pagefault handlers do this. So we do
|
||||
- * disable preemption but we don't necessarily care about that.
|
||||
*/
|
||||
static inline void pagefault_disable(void)
|
||||
{
|
||||
preempt_count_inc();
|
||||
+ pagefault_disabled_inc();
|
||||
/*
|
||||
* make sure to have issued the store before a pagefault
|
||||
* can hit.
|
||||
@@ -25,18 +41,24 @@ static inline void pagefault_disable(voi
|
||||
|
||||
static inline void pagefault_enable(void)
|
||||
{
|
||||
-#ifndef CONFIG_PREEMPT
|
||||
/*
|
||||
* make sure to issue those last loads/stores before enabling
|
||||
* the pagefault handler again.
|
||||
*/
|
||||
barrier();
|
||||
+ pagefault_disabled_dec();
|
||||
+#ifndef CONFIG_PREEMPT
|
||||
preempt_count_dec();
|
||||
#else
|
||||
preempt_enable();
|
||||
#endif
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Is the pagefault handler disabled? If so, user access methods will not sleep.
|
||||
+ */
|
||||
+#define pagefault_disabled() (current->pagefault_disabled != 0)
|
||||
+
|
||||
#ifndef ARCH_HAS_NOCACHE_UACCESS
|
||||
|
||||
static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
|
||||
--- a/kernel/fork.c
|
||||
+++ b/kernel/fork.c
|
||||
@@ -1396,6 +1396,9 @@ static struct task_struct *copy_process(
|
||||
p->hardirq_context = 0;
|
||||
p->softirq_context = 0;
|
||||
#endif
|
||||
+
|
||||
+ p->pagefault_disabled = 0;
|
||||
+
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
p->lockdep_depth = 0; /* no locks held yet */
|
||||
p->curr_chain_key = 0;
|
|
@ -1,27 +0,0 @@
|
|||
From: Anders Roxell <anders.roxell@linaro.org>
|
||||
Date: Mon, 27 Apr 2015 22:53:09 +0200
|
||||
Subject: arm64: Allow forced irq threading
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Now its safe to allow forced interrupt threading for arm64,
|
||||
all timer interrupts and the perf interrupt are marked NO_THREAD, as is
|
||||
the case with arch/arm: da0ec6f ARM: 7814/2: Allow forced irq threading
|
||||
|
||||
[ upstream commit: e8557d1f0c4d ]
|
||||
|
||||
Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
|
||||
---
|
||||
arch/arm64/Kconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/arch/arm64/Kconfig
|
||||
+++ b/arch/arm64/Kconfig
|
||||
@@ -71,6 +71,7 @@ config ARM64
|
||||
select HAVE_RCU_TABLE_FREE
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
select IRQ_DOMAIN
|
||||
+ select IRQ_FORCED_THREADING
|
||||
select MODULES_USE_ELF_RELA
|
||||
select NO_BOOTMEM
|
||||
select OF
|
|
@ -1,182 +0,0 @@
|
|||
From: Davidlohr Bueso <dave@stgolabs.net>
|
||||
Date: Fri, 1 May 2015 08:27:51 -0700
|
||||
Subject: futex: Implement lockless wakeups
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Given the overall futex architecture, any chance of reducing
|
||||
hb->lock contention is welcome. In this particular case, using
|
||||
wake-queues to enable lockless wakeups addresses very much real
|
||||
world performance concerns, even cases of soft-lockups in cases
|
||||
of large amounts of blocked tasks (which is not hard to find in
|
||||
large boxes, using but just a handful of futex).
|
||||
|
||||
At the lowest level, this patch can reduce latency of a single thread
|
||||
attempting to acquire hb->lock in highly contended scenarios by a
|
||||
up to 2x. At lower counts of nr_wake there are no regressions,
|
||||
confirming, of course, that the wake_q handling overhead is practically
|
||||
non existent. For instance, while a fair amount of variation,
|
||||
the extended pef-bench wakeup benchmark shows for a 20 core machine
|
||||
the following avg per-thread time to wakeup its share of tasks:
|
||||
|
||||
nr_thr ms-before ms-after
|
||||
16 0.0590 0.0215
|
||||
32 0.0396 0.0220
|
||||
48 0.0417 0.0182
|
||||
64 0.0536 0.0236
|
||||
80 0.0414 0.0097
|
||||
96 0.0672 0.0152
|
||||
|
||||
Naturally, this can cause spurious wakeups. However there is no core code
|
||||
that cannot handle them afaict, and furthermore tglx does have the point
|
||||
that other events can already trigger them anyway.
|
||||
|
||||
[upstream commit 1d0dcb3ad9d336e6d6ee020a750a7f8d907e28de]
|
||||
|
||||
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Acked-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Chris Mason <clm@fb.com>
|
||||
Cc: Davidlohr Bueso <dave@stgolabs.net>
|
||||
Cc: George Spelvin <linux@horizon.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Manfred Spraul <manfred@colorfullife.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Link: http://lkml.kernel.org/r/1430494072-30283-3-git-send-email-dave@stgolabs.net
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/futex.c | 33 +++++++++++++++++----------------
|
||||
1 file changed, 17 insertions(+), 16 deletions(-)
|
||||
|
||||
--- a/kernel/futex.c
|
||||
+++ b/kernel/futex.c
|
||||
@@ -1090,9 +1090,11 @@ static void __unqueue_futex(struct futex
|
||||
|
||||
/*
|
||||
* The hash bucket lock must be held when this is called.
|
||||
- * Afterwards, the futex_q must not be accessed.
|
||||
+ * Afterwards, the futex_q must not be accessed. Callers
|
||||
+ * must ensure to later call wake_up_q() for the actual
|
||||
+ * wakeups to occur.
|
||||
*/
|
||||
-static void wake_futex(struct futex_q *q)
|
||||
+static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
|
||||
{
|
||||
struct task_struct *p = q->task;
|
||||
|
||||
@@ -1100,14 +1102,10 @@ static void wake_futex(struct futex_q *q
|
||||
return;
|
||||
|
||||
/*
|
||||
- * We set q->lock_ptr = NULL _before_ we wake up the task. If
|
||||
- * a non-futex wake up happens on another CPU then the task
|
||||
- * might exit and p would dereference a non-existing task
|
||||
- * struct. Prevent this by holding a reference on p across the
|
||||
- * wake up.
|
||||
+ * Queue the task for later wakeup for after we've released
|
||||
+ * the hb->lock. wake_q_add() grabs reference to p.
|
||||
*/
|
||||
- get_task_struct(p);
|
||||
-
|
||||
+ wake_q_add(wake_q, p);
|
||||
__unqueue_futex(q);
|
||||
/*
|
||||
* The waiting task can free the futex_q as soon as
|
||||
@@ -1117,9 +1115,6 @@ static void wake_futex(struct futex_q *q
|
||||
*/
|
||||
smp_wmb();
|
||||
q->lock_ptr = NULL;
|
||||
-
|
||||
- wake_up_state(p, TASK_NORMAL);
|
||||
- put_task_struct(p);
|
||||
}
|
||||
|
||||
static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
|
||||
@@ -1217,6 +1212,7 @@ futex_wake(u32 __user *uaddr, unsigned i
|
||||
struct futex_q *this, *next;
|
||||
union futex_key key = FUTEX_KEY_INIT;
|
||||
int ret;
|
||||
+ WAKE_Q(wake_q);
|
||||
|
||||
if (!bitset)
|
||||
return -EINVAL;
|
||||
@@ -1244,13 +1240,14 @@ futex_wake(u32 __user *uaddr, unsigned i
|
||||
if (!(this->bitset & bitset))
|
||||
continue;
|
||||
|
||||
- wake_futex(this);
|
||||
+ mark_wake_futex(&wake_q, this);
|
||||
if (++ret >= nr_wake)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&hb->lock);
|
||||
+ wake_up_q(&wake_q);
|
||||
out_put_key:
|
||||
put_futex_key(&key);
|
||||
out:
|
||||
@@ -1269,6 +1266,7 @@ futex_wake_op(u32 __user *uaddr1, unsign
|
||||
struct futex_hash_bucket *hb1, *hb2;
|
||||
struct futex_q *this, *next;
|
||||
int ret, op_ret;
|
||||
+ WAKE_Q(wake_q);
|
||||
|
||||
retry:
|
||||
ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
|
||||
@@ -1320,7 +1318,7 @@ futex_wake_op(u32 __user *uaddr1, unsign
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
- wake_futex(this);
|
||||
+ mark_wake_futex(&wake_q, this);
|
||||
if (++ret >= nr_wake)
|
||||
break;
|
||||
}
|
||||
@@ -1334,7 +1332,7 @@ futex_wake_op(u32 __user *uaddr1, unsign
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
- wake_futex(this);
|
||||
+ mark_wake_futex(&wake_q, this);
|
||||
if (++op_ret >= nr_wake2)
|
||||
break;
|
||||
}
|
||||
@@ -1344,6 +1342,7 @@ futex_wake_op(u32 __user *uaddr1, unsign
|
||||
|
||||
out_unlock:
|
||||
double_unlock_hb(hb1, hb2);
|
||||
+ wake_up_q(&wake_q);
|
||||
out_put_keys:
|
||||
put_futex_key(&key2);
|
||||
out_put_key1:
|
||||
@@ -1503,6 +1502,7 @@ static int futex_requeue(u32 __user *uad
|
||||
struct futex_pi_state *pi_state = NULL;
|
||||
struct futex_hash_bucket *hb1, *hb2;
|
||||
struct futex_q *this, *next;
|
||||
+ WAKE_Q(wake_q);
|
||||
|
||||
if (requeue_pi) {
|
||||
/*
|
||||
@@ -1679,7 +1679,7 @@ static int futex_requeue(u32 __user *uad
|
||||
* woken by futex_unlock_pi().
|
||||
*/
|
||||
if (++task_count <= nr_wake && !requeue_pi) {
|
||||
- wake_futex(this);
|
||||
+ mark_wake_futex(&wake_q, this);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1719,6 +1719,7 @@ static int futex_requeue(u32 __user *uad
|
||||
out_unlock:
|
||||
free_pi_state(pi_state);
|
||||
double_unlock_hb(hb1, hb2);
|
||||
+ wake_up_q(&wake_q);
|
||||
hb_waiters_dec(hb2);
|
||||
|
||||
/*
|
|
@ -1,101 +0,0 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:07 +0200
|
||||
Subject: mm, uaccess: trigger might_sleep() in might_fault() with disabled pagefaults
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Commit 662bbcb2747c ("mm, sched: Allow uaccess in atomic with
|
||||
pagefault_disable()") removed might_sleep() checks for all user access
|
||||
code (that uses might_fault()).
|
||||
|
||||
The reason was to disable wrong "sleep in atomic" warnings in the
|
||||
following scenario:
|
||||
pagefault_disable()
|
||||
rc = copy_to_user(...)
|
||||
pagefault_enable()
|
||||
|
||||
Which is valid, as pagefault_disable() increments the preempt counter
|
||||
and therefore disables the pagefault handler. copy_to_user() will not
|
||||
sleep and return an error code if a page is not available.
|
||||
|
||||
However, as all might_sleep() checks are removed,
|
||||
CONFIG_DEBUG_ATOMIC_SLEEP would no longer detect the following scenario:
|
||||
spin_lock(&lock);
|
||||
rc = copy_to_user(...)
|
||||
spin_unlock(&lock)
|
||||
|
||||
If the kernel is compiled with preemption turned on, preempt_disable()
|
||||
will make in_atomic() detect disabled preemption. The fault handler would
|
||||
correctly never sleep on user access.
|
||||
However, with preemption turned off, preempt_disable() is usually a NOP
|
||||
(with !CONFIG_PREEMPT_COUNT), therefore in_atomic() will not be able to
|
||||
detect disabled preemption nor disabled pagefaults. The fault handler
|
||||
could sleep.
|
||||
We really want to enable CONFIG_DEBUG_ATOMIC_SLEEP checks for user access
|
||||
functions again, otherwise we can end up with horrible deadlocks.
|
||||
|
||||
Root of all evil is that pagefault_disable() acts almost as
|
||||
preempt_disable(), depending on preemption being turned on/off.
|
||||
|
||||
As we now have pagefault_disabled(), we can use it to distinguish
|
||||
whether user acces functions might sleep.
|
||||
|
||||
Convert might_fault() into a makro that calls __might_fault(), to
|
||||
allow proper file + line messages in case of a might_sleep() warning.
|
||||
|
||||
[upstream commit 9ec23531fd48031d1b6ca5366f5f967d17a8bc28]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
include/linux/kernel.h | 3 ++-
|
||||
mm/memory.c | 18 ++++++------------
|
||||
2 files changed, 8 insertions(+), 13 deletions(-)
|
||||
|
||||
--- a/include/linux/kernel.h
|
||||
+++ b/include/linux/kernel.h
|
||||
@@ -244,7 +244,8 @@ static inline u32 reciprocal_scale(u32 v
|
||||
|
||||
#if defined(CONFIG_MMU) && \
|
||||
(defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP))
|
||||
-void might_fault(void);
|
||||
+#define might_fault() __might_fault(__FILE__, __LINE__)
|
||||
+void __might_fault(const char *file, int line);
|
||||
#else
|
||||
static inline void might_fault(void) { }
|
||||
#endif
|
||||
--- a/mm/memory.c
|
||||
+++ b/mm/memory.c
|
||||
@@ -3737,7 +3737,7 @@ void print_vma_addr(char *prefix, unsign
|
||||
}
|
||||
|
||||
#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)
|
||||
-void might_fault(void)
|
||||
+void __might_fault(const char *file, int line)
|
||||
{
|
||||
/*
|
||||
* Some code (nfs/sunrpc) uses socket ops on kernel memory while
|
||||
@@ -3747,21 +3747,15 @@ void might_fault(void)
|
||||
*/
|
||||
if (segment_eq(get_fs(), KERNEL_DS))
|
||||
return;
|
||||
-
|
||||
- /*
|
||||
- * it would be nicer only to annotate paths which are not under
|
||||
- * pagefault_disable, however that requires a larger audit and
|
||||
- * providing helpers like get_user_atomic.
|
||||
- */
|
||||
- if (in_atomic())
|
||||
+ if (pagefault_disabled())
|
||||
return;
|
||||
-
|
||||
- __might_sleep(__FILE__, __LINE__, 0);
|
||||
-
|
||||
+ __might_sleep(file, line, 0);
|
||||
+#if defined(CONFIG_DEBUG_ATOMIC_SLEEP)
|
||||
if (current->mm)
|
||||
might_lock_read(¤t->mm->mmap_sem);
|
||||
+#endif
|
||||
}
|
||||
-EXPORT_SYMBOL(might_fault);
|
||||
+EXPORT_SYMBOL(__might_fault);
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
|
|
@ -1,642 +0,0 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:08 +0200
|
||||
Subject: [PATCH] sched/preempt, futex: Update comments to clarify that preemption doesn't have to be disabled
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
In general, non-atomic variants of user access functions must not sleep
|
||||
if pagefaults are disabled.
|
||||
|
||||
Let's update all relevant comments in uaccess code. This also reflects
|
||||
the might_sleep() checks in might_fault().
|
||||
|
||||
[upstream commit 2f09b227eeed4b3a072fe818c82a4c773b778cde]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
arch/avr32/include/asm/uaccess.h | 12 ++++++---
|
||||
arch/hexagon/include/asm/uaccess.h | 3 +-
|
||||
arch/m32r/include/asm/uaccess.h | 30 +++++++++++++++-------
|
||||
arch/microblaze/include/asm/uaccess.h | 6 +++-
|
||||
arch/mips/include/asm/uaccess.h | 45 ++++++++++++++++++++++------------
|
||||
arch/s390/include/asm/uaccess.h | 15 +++++++----
|
||||
arch/score/include/asm/uaccess.h | 15 +++++++----
|
||||
arch/tile/include/asm/uaccess.h | 18 +++++++++----
|
||||
arch/x86/include/asm/uaccess.h | 15 +++++++----
|
||||
arch/x86/include/asm/uaccess_32.h | 6 +++-
|
||||
arch/x86/lib/usercopy_32.c | 6 +++-
|
||||
lib/strnlen_user.c | 6 +++-
|
||||
12 files changed, 118 insertions(+), 59 deletions(-)
|
||||
|
||||
--- a/arch/avr32/include/asm/uaccess.h
|
||||
+++ b/arch/avr32/include/asm/uaccess.h
|
||||
@@ -97,7 +97,8 @@ static inline __kernel_size_t __copy_fro
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -116,7 +117,8 @@ static inline __kernel_size_t __copy_fro
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -136,7 +138,8 @@ static inline __kernel_size_t __copy_fro
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -158,7 +161,8 @@ static inline __kernel_size_t __copy_fro
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
--- a/arch/hexagon/include/asm/uaccess.h
|
||||
+++ b/arch/hexagon/include/asm/uaccess.h
|
||||
@@ -36,7 +36,8 @@
|
||||
* @addr: User space pointer to start of block to check
|
||||
* @size: Size of block to check
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Checks if a pointer to a block of memory in user space is valid.
|
||||
*
|
||||
--- a/arch/m32r/include/asm/uaccess.h
|
||||
+++ b/arch/m32r/include/asm/uaccess.h
|
||||
@@ -91,7 +91,8 @@ static inline void set_fs(mm_segment_t s
|
||||
* @addr: User space pointer to start of block to check
|
||||
* @size: Size of block to check
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Checks if a pointer to a block of memory in user space is valid.
|
||||
*
|
||||
@@ -155,7 +156,8 @@ extern int fixup_exception(struct pt_reg
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -175,7 +177,8 @@ extern int fixup_exception(struct pt_reg
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -194,7 +197,8 @@ extern int fixup_exception(struct pt_reg
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -274,7 +278,8 @@ do { \
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -568,7 +573,8 @@ unsigned long __generic_copy_from_user(v
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -588,7 +594,8 @@ unsigned long __generic_copy_from_user(v
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space.
|
||||
*
|
||||
@@ -606,7 +613,8 @@ unsigned long __generic_copy_from_user(v
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -626,7 +634,8 @@ unsigned long __generic_copy_from_user(v
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space.
|
||||
*
|
||||
@@ -677,7 +686,8 @@ unsigned long clear_user(void __user *me
|
||||
* strlen_user: - Get the size of a string in user space.
|
||||
* @str: The string to measure.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Get the size of a NUL-terminated string in user space.
|
||||
*
|
||||
--- a/arch/microblaze/include/asm/uaccess.h
|
||||
+++ b/arch/microblaze/include/asm/uaccess.h
|
||||
@@ -178,7 +178,8 @@ extern long __user_bad(void);
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -290,7 +291,8 @@ extern long __user_bad(void);
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
--- a/arch/mips/include/asm/uaccess.h
|
||||
+++ b/arch/mips/include/asm/uaccess.h
|
||||
@@ -103,7 +103,8 @@ extern u64 __ua_limit;
|
||||
* @addr: User space pointer to start of block to check
|
||||
* @size: Size of block to check
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Checks if a pointer to a block of memory in user space is valid.
|
||||
*
|
||||
@@ -138,7 +139,8 @@ extern u64 __ua_limit;
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -157,7 +159,8 @@ extern u64 __ua_limit;
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -177,7 +180,8 @@ extern u64 __ua_limit;
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -199,7 +203,8 @@ extern u64 __ua_limit;
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -498,7 +503,8 @@ extern void __put_user_unknown(void);
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -517,7 +523,8 @@ extern void __put_user_unknown(void);
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -537,7 +544,8 @@ extern void __put_user_unknown(void);
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -559,7 +567,8 @@ extern void __put_user_unknown(void);
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -815,7 +824,8 @@ extern size_t __copy_user(void *__to, co
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -888,7 +898,8 @@ extern size_t __copy_user_inatomic(void
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space.
|
||||
*
|
||||
@@ -1075,7 +1086,8 @@ extern size_t __copy_in_user_eva(void *_
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -1107,7 +1119,8 @@ extern size_t __copy_in_user_eva(void *_
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space.
|
||||
*
|
||||
@@ -1329,7 +1342,8 @@ strncpy_from_user(char *__to, const char
|
||||
* strlen_user: - Get the size of a string in user space.
|
||||
* @str: The string to measure.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Get the size of a NUL-terminated string in user space.
|
||||
*
|
||||
@@ -1398,7 +1412,8 @@ static inline long __strnlen_user(const
|
||||
* strnlen_user: - Get the size of a string in user space.
|
||||
* @str: The string to measure.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Get the size of a NUL-terminated string in user space.
|
||||
*
|
||||
--- a/arch/s390/include/asm/uaccess.h
|
||||
+++ b/arch/s390/include/asm/uaccess.h
|
||||
@@ -98,7 +98,8 @@ static inline unsigned long extable_fixu
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -118,7 +119,8 @@ unsigned long __must_check __copy_from_u
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -264,7 +266,8 @@ int __get_user_bad(void) __attribute__((
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space.
|
||||
*
|
||||
@@ -290,7 +293,8 @@ void copy_from_user_overflow(void)
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space.
|
||||
*
|
||||
@@ -348,7 +352,8 @@ static inline unsigned long strnlen_user
|
||||
* strlen_user: - Get the size of a string in user space.
|
||||
* @str: The string to measure.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Get the size of a NUL-terminated string in user space.
|
||||
*
|
||||
--- a/arch/score/include/asm/uaccess.h
|
||||
+++ b/arch/score/include/asm/uaccess.h
|
||||
@@ -36,7 +36,8 @@
|
||||
* @addr: User space pointer to start of block to check
|
||||
* @size: Size of block to check
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Checks if a pointer to a block of memory in user space is valid.
|
||||
*
|
||||
@@ -61,7 +62,8 @@
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -79,7 +81,8 @@
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -98,7 +101,8 @@
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -119,7 +123,8 @@
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
--- a/arch/tile/include/asm/uaccess.h
|
||||
+++ b/arch/tile/include/asm/uaccess.h
|
||||
@@ -78,7 +78,8 @@ int __range_ok(unsigned long addr, unsig
|
||||
* @addr: User space pointer to start of block to check
|
||||
* @size: Size of block to check
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Checks if a pointer to a block of memory in user space is valid.
|
||||
*
|
||||
@@ -192,7 +193,8 @@ extern int __get_user_bad(void)
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -274,7 +276,8 @@ extern int __put_user_bad(void)
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -330,7 +333,8 @@ extern int __put_user_bad(void)
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -366,7 +370,8 @@ copy_to_user(void __user *to, const void
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -437,7 +442,8 @@ static inline unsigned long __must_check
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to user space. Caller must check
|
||||
* the specified blocks with access_ok() before calling this function.
|
||||
--- a/arch/x86/include/asm/uaccess.h
|
||||
+++ b/arch/x86/include/asm/uaccess.h
|
||||
@@ -74,7 +74,8 @@ static inline bool __chk_range_not_ok(un
|
||||
* @addr: User space pointer to start of block to check
|
||||
* @size: Size of block to check
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Checks if a pointer to a block of memory in user space is valid.
|
||||
*
|
||||
@@ -145,7 +146,8 @@ extern int __get_user_bad(void);
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -240,7 +242,8 @@ extern void __put_user_8(void);
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -455,7 +458,8 @@ struct __large_struct { unsigned long bu
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -479,7 +483,8 @@ struct __large_struct { unsigned long bu
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
--- a/arch/x86/include/asm/uaccess_32.h
|
||||
+++ b/arch/x86/include/asm/uaccess_32.h
|
||||
@@ -70,7 +70,8 @@ static __always_inline unsigned long __m
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -117,7 +118,8 @@ static __always_inline unsigned long
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
--- a/arch/x86/lib/usercopy_32.c
|
||||
+++ b/arch/x86/lib/usercopy_32.c
|
||||
@@ -647,7 +647,8 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocach
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space.
|
||||
*
|
||||
@@ -668,7 +669,8 @@ EXPORT_SYMBOL(_copy_to_user);
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space.
|
||||
*
|
||||
--- a/lib/strnlen_user.c
|
||||
+++ b/lib/strnlen_user.c
|
||||
@@ -85,7 +85,8 @@ static inline long do_strnlen_user(const
|
||||
* @str: The string to measure.
|
||||
* @count: Maximum count (including NUL character)
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Get the size of a NUL-terminated string in user space.
|
||||
*
|
||||
@@ -121,7 +122,8 @@ EXPORT_SYMBOL(strnlen_user);
|
||||
* strlen_user: - Get the size of a user string INCLUDING final NUL.
|
||||
* @str: The string to measure.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Get the size of a NUL-terminated string in user space.
|
||||
*
|
|
@ -1,184 +0,0 @@
|
|||
From: Davidlohr Bueso <dave@stgolabs.net>
|
||||
Date: Mon, 4 May 2015 07:02:46 -0700
|
||||
Subject: ipc/mqueue: Implement lockless pipelined wakeups
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
This patch moves the wakeup_process() invocation so it is not done under
|
||||
the info->lock by making use of a lockless wake_q. With this change, the
|
||||
waiter is woken up once it is STATE_READY and it does not need to loop
|
||||
on SMP if it is still in STATE_PENDING. In the timeout case we still need
|
||||
to grab the info->lock to verify the state.
|
||||
|
||||
This change should also avoid the introduction of preempt_disable() in -rt
|
||||
which avoids a busy-loop which pools for the STATE_PENDING -> STATE_READY
|
||||
change if the waiter has a higher priority compared to the waker.
|
||||
|
||||
Additionally, this patch micro-optimizes wq_sleep by using the cheaper
|
||||
cousin of set_current_state(TASK_INTERRUPTABLE) as we will block no
|
||||
matter what, thus get rid of the implied barrier.
|
||||
|
||||
[upstream commit fa6004ad4528153b699a4d5ce5ea6b33acce74cc]
|
||||
|
||||
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Acked-by: George Spelvin <linux@horizon.com>
|
||||
Acked-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Chris Mason <clm@fb.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Manfred Spraul <manfred@colorfullife.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Cc: dave@stgolabs.net
|
||||
Link: http://lkml.kernel.org/r/1430748166.1940.17.camel@stgolabs.net
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
ipc/mqueue.c | 54 +++++++++++++++++++++++++++++++++---------------------
|
||||
1 file changed, 33 insertions(+), 21 deletions(-)
|
||||
|
||||
--- a/ipc/mqueue.c
|
||||
+++ b/ipc/mqueue.c
|
||||
@@ -47,8 +47,7 @@
|
||||
#define RECV 1
|
||||
|
||||
#define STATE_NONE 0
|
||||
-#define STATE_PENDING 1
|
||||
-#define STATE_READY 2
|
||||
+#define STATE_READY 1
|
||||
|
||||
struct posix_msg_tree_node {
|
||||
struct rb_node rb_node;
|
||||
@@ -571,15 +570,12 @@ static int wq_sleep(struct mqueue_inode_
|
||||
wq_add(info, sr, ewp);
|
||||
|
||||
for (;;) {
|
||||
- set_current_state(TASK_INTERRUPTIBLE);
|
||||
+ __set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
spin_unlock(&info->lock);
|
||||
time = schedule_hrtimeout_range_clock(timeout, 0,
|
||||
HRTIMER_MODE_ABS, CLOCK_REALTIME);
|
||||
|
||||
- while (ewp->state == STATE_PENDING)
|
||||
- cpu_relax();
|
||||
-
|
||||
if (ewp->state == STATE_READY) {
|
||||
retval = 0;
|
||||
goto out;
|
||||
@@ -907,11 +903,15 @@ SYSCALL_DEFINE1(mq_unlink, const char __
|
||||
* list of waiting receivers. A sender checks that list before adding the new
|
||||
* message into the message array. If there is a waiting receiver, then it
|
||||
* bypasses the message array and directly hands the message over to the
|
||||
- * receiver.
|
||||
- * The receiver accepts the message and returns without grabbing the queue
|
||||
- * spinlock. Therefore an intermediate STATE_PENDING state and memory barriers
|
||||
- * are necessary. The same algorithm is used for sysv semaphores, see
|
||||
- * ipc/sem.c for more details.
|
||||
+ * receiver. The receiver accepts the message and returns without grabbing the
|
||||
+ * queue spinlock:
|
||||
+ *
|
||||
+ * - Set pointer to message.
|
||||
+ * - Queue the receiver task for later wakeup (without the info->lock).
|
||||
+ * - Update its state to STATE_READY. Now the receiver can continue.
|
||||
+ * - Wake up the process after the lock is dropped. Should the process wake up
|
||||
+ * before this wakeup (due to a timeout or a signal) it will either see
|
||||
+ * STATE_READY and continue or acquire the lock to check the state again.
|
||||
*
|
||||
* The same algorithm is used for senders.
|
||||
*/
|
||||
@@ -919,21 +919,29 @@ SYSCALL_DEFINE1(mq_unlink, const char __
|
||||
/* pipelined_send() - send a message directly to the task waiting in
|
||||
* sys_mq_timedreceive() (without inserting message into a queue).
|
||||
*/
|
||||
-static inline void pipelined_send(struct mqueue_inode_info *info,
|
||||
+static inline void pipelined_send(struct wake_q_head *wake_q,
|
||||
+ struct mqueue_inode_info *info,
|
||||
struct msg_msg *message,
|
||||
struct ext_wait_queue *receiver)
|
||||
{
|
||||
receiver->msg = message;
|
||||
list_del(&receiver->list);
|
||||
- receiver->state = STATE_PENDING;
|
||||
- wake_up_process(receiver->task);
|
||||
- smp_wmb();
|
||||
+ wake_q_add(wake_q, receiver->task);
|
||||
+ /*
|
||||
+ * Rely on the implicit cmpxchg barrier from wake_q_add such
|
||||
+ * that we can ensure that updating receiver->state is the last
|
||||
+ * write operation: As once set, the receiver can continue,
|
||||
+ * and if we don't have the reference count from the wake_q,
|
||||
+ * yet, at that point we can later have a use-after-free
|
||||
+ * condition and bogus wakeup.
|
||||
+ */
|
||||
receiver->state = STATE_READY;
|
||||
}
|
||||
|
||||
/* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
|
||||
* gets its message and put to the queue (we have one free place for sure). */
|
||||
-static inline void pipelined_receive(struct mqueue_inode_info *info)
|
||||
+static inline void pipelined_receive(struct wake_q_head *wake_q,
|
||||
+ struct mqueue_inode_info *info)
|
||||
{
|
||||
struct ext_wait_queue *sender = wq_get_first_waiter(info, SEND);
|
||||
|
||||
@@ -944,10 +952,9 @@ static inline void pipelined_receive(str
|
||||
}
|
||||
if (msg_insert(sender->msg, info))
|
||||
return;
|
||||
+
|
||||
list_del(&sender->list);
|
||||
- sender->state = STATE_PENDING;
|
||||
- wake_up_process(sender->task);
|
||||
- smp_wmb();
|
||||
+ wake_q_add(wake_q, sender->task);
|
||||
sender->state = STATE_READY;
|
||||
}
|
||||
|
||||
@@ -965,6 +972,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqd
|
||||
struct timespec ts;
|
||||
struct posix_msg_tree_node *new_leaf = NULL;
|
||||
int ret = 0;
|
||||
+ WAKE_Q(wake_q);
|
||||
|
||||
if (u_abs_timeout) {
|
||||
int res = prepare_timeout(u_abs_timeout, &expires, &ts);
|
||||
@@ -1049,7 +1057,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqd
|
||||
} else {
|
||||
receiver = wq_get_first_waiter(info, RECV);
|
||||
if (receiver) {
|
||||
- pipelined_send(info, msg_ptr, receiver);
|
||||
+ pipelined_send(&wake_q, info, msg_ptr, receiver);
|
||||
} else {
|
||||
/* adds message to the queue */
|
||||
ret = msg_insert(msg_ptr, info);
|
||||
@@ -1062,6 +1070,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqd
|
||||
}
|
||||
out_unlock:
|
||||
spin_unlock(&info->lock);
|
||||
+ wake_up_q(&wake_q);
|
||||
out_free:
|
||||
if (ret)
|
||||
free_msg(msg_ptr);
|
||||
@@ -1149,14 +1158,17 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t,
|
||||
msg_ptr = wait.msg;
|
||||
}
|
||||
} else {
|
||||
+ WAKE_Q(wake_q);
|
||||
+
|
||||
msg_ptr = msg_get(info);
|
||||
|
||||
inode->i_atime = inode->i_mtime = inode->i_ctime =
|
||||
CURRENT_TIME;
|
||||
|
||||
/* There is now free space in queue. */
|
||||
- pipelined_receive(info);
|
||||
+ pipelined_receive(&wake_q, info);
|
||||
spin_unlock(&info->lock);
|
||||
+ wake_up_q(&wake_q);
|
||||
ret = 0;
|
||||
}
|
||||
if (ret == 0) {
|
|
@ -1,368 +0,0 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:09 +0200
|
||||
Subject: sched/preempt, mm/kmap: Explicitly disable/enable preemption in kmap_atomic_*
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The existing code relies on pagefault_disable() implicitly disabling
|
||||
preemption, so that no schedule will happen between kmap_atomic() and
|
||||
kunmap_atomic().
|
||||
|
||||
Let's make this explicit, to prepare for pagefault_disable() not
|
||||
touching preemption anymore.
|
||||
|
||||
[uptream commit 2cb7c9cb426660b5ed58b643d9e7dd5d50ba901f]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
arch/arm/mm/highmem.c | 3 +++
|
||||
arch/frv/mm/highmem.c | 2 ++
|
||||
arch/metag/mm/highmem.c | 4 +++-
|
||||
arch/microblaze/mm/highmem.c | 4 +++-
|
||||
arch/mips/mm/highmem.c | 5 ++++-
|
||||
arch/mn10300/include/asm/highmem.h | 3 +++
|
||||
arch/parisc/include/asm/cacheflush.h | 2 ++
|
||||
arch/powerpc/mm/highmem.c | 4 +++-
|
||||
arch/sparc/mm/highmem.c | 4 +++-
|
||||
arch/tile/mm/highmem.c | 3 ++-
|
||||
arch/x86/mm/highmem_32.c | 3 ++-
|
||||
arch/x86/mm/iomap_32.c | 2 ++
|
||||
arch/xtensa/mm/highmem.c | 2 ++
|
||||
include/linux/highmem.h | 2 ++
|
||||
include/linux/io-mapping.h | 2 ++
|
||||
15 files changed, 38 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/arch/arm/mm/highmem.c
|
||||
+++ b/arch/arm/mm/highmem.c
|
||||
@@ -59,6 +59,7 @@ void *kmap_atomic(struct page *page)
|
||||
void *kmap;
|
||||
int type;
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -121,6 +122,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
|
||||
}
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
|
||||
@@ -130,6 +132,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
|
||||
int idx, type;
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
--- a/arch/frv/mm/highmem.c
|
||||
+++ b/arch/frv/mm/highmem.c
|
||||
@@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page)
|
||||
unsigned long paddr;
|
||||
int type;
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
type = kmap_atomic_idx_push();
|
||||
paddr = page_to_phys(page);
|
||||
@@ -85,5 +86,6 @@ void __kunmap_atomic(void *kvaddr)
|
||||
}
|
||||
kmap_atomic_idx_pop();
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
--- a/arch/metag/mm/highmem.c
|
||||
+++ b/arch/metag/mm/highmem.c
|
||||
@@ -43,7 +43,7 @@ void *kmap_atomic(struct page *page)
|
||||
unsigned long vaddr;
|
||||
int type;
|
||||
|
||||
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -82,6 +82,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
}
|
||||
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
|
||||
@@ -95,6 +96,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
|
||||
unsigned long vaddr;
|
||||
int type;
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
|
||||
type = kmap_atomic_idx_push();
|
||||
--- a/arch/microblaze/mm/highmem.c
|
||||
+++ b/arch/microblaze/mm/highmem.c
|
||||
@@ -37,7 +37,7 @@ void *kmap_atomic_prot(struct page *page
|
||||
unsigned long vaddr;
|
||||
int idx, type;
|
||||
|
||||
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -63,6 +63,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
|
||||
if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -84,5 +85,6 @@ void __kunmap_atomic(void *kvaddr)
|
||||
#endif
|
||||
kmap_atomic_idx_pop();
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
--- a/arch/mips/mm/highmem.c
|
||||
+++ b/arch/mips/mm/highmem.c
|
||||
@@ -47,7 +47,7 @@ void *kmap_atomic(struct page *page)
|
||||
unsigned long vaddr;
|
||||
int idx, type;
|
||||
|
||||
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -72,6 +72,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
|
||||
if (vaddr < FIXADDR_START) { // FIXME
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -92,6 +93,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
#endif
|
||||
kmap_atomic_idx_pop();
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
|
||||
@@ -104,6 +106,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
|
||||
unsigned long vaddr;
|
||||
int idx, type;
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
|
||||
type = kmap_atomic_idx_push();
|
||||
--- a/arch/mn10300/include/asm/highmem.h
|
||||
+++ b/arch/mn10300/include/asm/highmem.h
|
||||
@@ -75,6 +75,7 @@ static inline void *kmap_atomic(struct p
|
||||
unsigned long vaddr;
|
||||
int idx, type;
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (page < highmem_start_page)
|
||||
return page_address(page);
|
||||
@@ -98,6 +99,7 @@ static inline void __kunmap_atomic(unsig
|
||||
|
||||
if (vaddr < FIXADDR_START) { /* FIXME */
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -122,6 +124,7 @@ static inline void __kunmap_atomic(unsig
|
||||
|
||||
kmap_atomic_idx_pop();
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
--- a/arch/parisc/include/asm/cacheflush.h
|
||||
+++ b/arch/parisc/include/asm/cacheflush.h
|
||||
@@ -142,6 +142,7 @@ static inline void kunmap(struct page *p
|
||||
|
||||
static inline void *kmap_atomic(struct page *page)
|
||||
{
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
return page_address(page);
|
||||
}
|
||||
@@ -150,6 +151,7 @@ static inline void __kunmap_atomic(void
|
||||
{
|
||||
flush_kernel_dcache_page_addr(addr);
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
|
||||
#define kmap_atomic_prot(page, prot) kmap_atomic(page)
|
||||
--- a/arch/powerpc/mm/highmem.c
|
||||
+++ b/arch/powerpc/mm/highmem.c
|
||||
@@ -34,7 +34,7 @@ void *kmap_atomic_prot(struct page *page
|
||||
unsigned long vaddr;
|
||||
int idx, type;
|
||||
|
||||
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -59,6 +59,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
|
||||
if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -82,5 +83,6 @@ void __kunmap_atomic(void *kvaddr)
|
||||
|
||||
kmap_atomic_idx_pop();
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
--- a/arch/sparc/mm/highmem.c
|
||||
+++ b/arch/sparc/mm/highmem.c
|
||||
@@ -53,7 +53,7 @@ void *kmap_atomic(struct page *page)
|
||||
unsigned long vaddr;
|
||||
long idx, type;
|
||||
|
||||
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -91,6 +91,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
|
||||
if (vaddr < FIXADDR_START) { // FIXME
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -126,5 +127,6 @@ void __kunmap_atomic(void *kvaddr)
|
||||
|
||||
kmap_atomic_idx_pop();
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
--- a/arch/tile/mm/highmem.c
|
||||
+++ b/arch/tile/mm/highmem.c
|
||||
@@ -201,7 +201,7 @@ void *kmap_atomic_prot(struct page *page
|
||||
int idx, type;
|
||||
pte_t *pte;
|
||||
|
||||
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
|
||||
/* Avoid icache flushes by disallowing atomic executable mappings. */
|
||||
@@ -259,6 +259,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
}
|
||||
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
|
||||
--- a/arch/x86/mm/highmem_32.c
|
||||
+++ b/arch/x86/mm/highmem_32.c
|
||||
@@ -35,7 +35,7 @@ void *kmap_atomic_prot(struct page *page
|
||||
unsigned long vaddr;
|
||||
int idx, type;
|
||||
|
||||
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
|
||||
if (!PageHighMem(page))
|
||||
@@ -100,6 +100,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
#endif
|
||||
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
|
||||
--- a/arch/x86/mm/iomap_32.c
|
||||
+++ b/arch/x86/mm/iomap_32.c
|
||||
@@ -59,6 +59,7 @@ void *kmap_atomic_prot_pfn(unsigned long
|
||||
unsigned long vaddr;
|
||||
int idx, type;
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
|
||||
type = kmap_atomic_idx_push();
|
||||
@@ -117,5 +118,6 @@ iounmap_atomic(void __iomem *kvaddr)
|
||||
}
|
||||
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iounmap_atomic);
|
||||
--- a/arch/xtensa/mm/highmem.c
|
||||
+++ b/arch/xtensa/mm/highmem.c
|
||||
@@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page)
|
||||
enum fixed_addresses idx;
|
||||
unsigned long vaddr;
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -79,6 +80,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
}
|
||||
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
|
||||
--- a/include/linux/highmem.h
|
||||
+++ b/include/linux/highmem.h
|
||||
@@ -65,6 +65,7 @@ static inline void kunmap(struct page *p
|
||||
|
||||
static inline void *kmap_atomic(struct page *page)
|
||||
{
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
return page_address(page);
|
||||
}
|
||||
@@ -73,6 +74,7 @@ static inline void *kmap_atomic(struct p
|
||||
static inline void __kunmap_atomic(void *addr)
|
||||
{
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
|
||||
#define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn))
|
||||
--- a/include/linux/io-mapping.h
|
||||
+++ b/include/linux/io-mapping.h
|
||||
@@ -141,6 +141,7 @@ static inline void __iomem *
|
||||
io_mapping_map_atomic_wc(struct io_mapping *mapping,
|
||||
unsigned long offset)
|
||||
{
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
return ((char __force __iomem *) mapping) + offset;
|
||||
}
|
||||
@@ -149,6 +150,7 @@ static inline void
|
||||
io_mapping_unmap_atomic(void __iomem *vaddr)
|
||||
{
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
|
||||
/* Non-atomic map/unmap */
|
|
@ -1,43 +0,0 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 1 Mar 2013 11:17:42 +0100
|
||||
Subject: futex: Ensure lock/unlock symetry versus pi_lock and hash bucket lock
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
In exit_pi_state_list() we have the following locking construct:
|
||||
|
||||
spin_lock(&hb->lock);
|
||||
raw_spin_lock_irq(&curr->pi_lock);
|
||||
|
||||
...
|
||||
spin_unlock(&hb->lock);
|
||||
|
||||
In !RT this works, but on RT the migrate_enable() function which is
|
||||
called from spin_unlock() sees atomic context due to the held pi_lock
|
||||
and just decrements the migrate_disable_atomic counter of the
|
||||
task. Now the next call to migrate_disable() sees the counter being
|
||||
negative and issues a warning. That check should be in
|
||||
migrate_enable() already.
|
||||
|
||||
Fix this by dropping pi_lock before unlocking hb->lock and reaquire
|
||||
pi_lock after that again. This is safe as the loop code reevaluates
|
||||
head again under the pi_lock.
|
||||
|
||||
Reported-by: Yong Zhang <yong.zhang@windriver.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/futex.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/kernel/futex.c
|
||||
+++ b/kernel/futex.c
|
||||
@@ -738,7 +738,9 @@ void exit_pi_state_list(struct task_stru
|
||||
* task still owns the PI-state:
|
||||
*/
|
||||
if (head->next != next) {
|
||||
+ raw_spin_unlock_irq(&curr->pi_lock);
|
||||
spin_unlock(&hb->lock);
|
||||
+ raw_spin_lock_irq(&curr->pi_lock);
|
||||
continue;
|
||||
}
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:10 +0200
|
||||
Subject: sched/preempt, mm/kmap, MIPS: Disable preemption in kmap_coherent() explicitly
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
k(un)map_coherent relies on pagefault_disable() to also disable
|
||||
preemption.
|
||||
|
||||
Let's make this explicit, to prepare for pagefault_disable() not
|
||||
touching preemption anymore.
|
||||
|
||||
This patch is based on a patch by Yang Shi on the -rt tree:
|
||||
"k{un}map_coherent are just called when cpu_has_dc_aliases == 1 with VIPT
|
||||
cache. However, actually, the most modern MIPS processors have PIPT dcache
|
||||
without dcache alias issue. In such case, k{un}map_atomic will be called
|
||||
with preempt enabled."
|
||||
|
||||
[upstream commit ce01948eb85da733558fa77c2a554144a57ab0fb]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
arch/mips/mm/init.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/arch/mips/mm/init.c
|
||||
+++ b/arch/mips/mm/init.c
|
||||
@@ -90,6 +90,7 @@ static void *__kmap_pgprot(struct page *
|
||||
|
||||
BUG_ON(Page_dcache_dirty(page));
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
|
||||
idx += in_interrupt() ? FIX_N_COLOURS : 0;
|
||||
@@ -152,6 +153,7 @@ void kunmap_coherent(void)
|
||||
write_c0_entryhi(old_ctx);
|
||||
local_irq_restore(flags);
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
|
||||
void copy_user_highpage(struct page *to, struct page *from,
|
|
@ -1,647 +0,0 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:11 +0200
|
||||
Subject: mm/fault, arch: Use pagefault_disable() to check for disabled pagefaults in the handler
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Introduce faulthandler_disabled() and use it to check for irq context and
|
||||
disabled pagefaults (via pagefault_disable()) in the pagefault handlers.
|
||||
|
||||
Please note that we keep the in_atomic() checks in place - to detect
|
||||
whether in irq context (in which case preemption is always properly
|
||||
disabled).
|
||||
|
||||
In contrast, preempt_disable() should never be used to disable pagefaults.
|
||||
With !CONFIG_PREEMPT_COUNT, preempt_disable() doesn't modify the preempt
|
||||
counter, and therefore the result of in_atomic() differs.
|
||||
We validate that condition by using might_fault() checks when calling
|
||||
might_sleep().
|
||||
|
||||
Therefore, add a comment to faulthandler_disabled(), describing why this
|
||||
is needed.
|
||||
|
||||
faulthandler_disabled() and pagefault_disable() are defined in
|
||||
linux/uaccess.h, so let's properly add that include to all relevant files.
|
||||
|
||||
This patch is based on a patch from Thomas Gleixner.
|
||||
|
||||
[upstream commit 70ffdb9393a7264a069265edded729078dcf0425]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
arch/alpha/mm/fault.c | 5 ++---
|
||||
arch/arc/mm/fault.c | 2 +-
|
||||
arch/arm/mm/fault.c | 2 +-
|
||||
arch/arm64/mm/fault.c | 2 +-
|
||||
arch/avr32/mm/fault.c | 4 ++--
|
||||
arch/cris/mm/fault.c | 6 +++---
|
||||
arch/frv/mm/fault.c | 4 ++--
|
||||
arch/ia64/mm/fault.c | 4 ++--
|
||||
arch/m32r/mm/fault.c | 8 ++++----
|
||||
arch/m68k/mm/fault.c | 4 ++--
|
||||
arch/metag/mm/fault.c | 2 +-
|
||||
arch/microblaze/mm/fault.c | 8 ++++----
|
||||
arch/mips/mm/fault.c | 4 ++--
|
||||
arch/mn10300/mm/fault.c | 4 ++--
|
||||
arch/nios2/mm/fault.c | 2 +-
|
||||
arch/parisc/kernel/traps.c | 4 ++--
|
||||
arch/parisc/mm/fault.c | 4 ++--
|
||||
arch/powerpc/mm/fault.c | 9 +++++----
|
||||
arch/s390/mm/fault.c | 2 +-
|
||||
arch/score/mm/fault.c | 3 ++-
|
||||
arch/sh/mm/fault.c | 5 +++--
|
||||
arch/sparc/mm/fault_32.c | 4 ++--
|
||||
arch/sparc/mm/fault_64.c | 4 ++--
|
||||
arch/sparc/mm/init_64.c | 2 +-
|
||||
arch/tile/mm/fault.c | 4 ++--
|
||||
arch/um/kernel/trap.c | 4 ++--
|
||||
arch/unicore32/mm/fault.c | 2 +-
|
||||
arch/x86/mm/fault.c | 5 +++--
|
||||
arch/xtensa/mm/fault.c | 4 ++--
|
||||
include/linux/uaccess.h | 12 ++++++++++++
|
||||
30 files changed, 72 insertions(+), 57 deletions(-)
|
||||
|
||||
--- a/arch/alpha/mm/fault.c
|
||||
+++ b/arch/alpha/mm/fault.c
|
||||
@@ -23,8 +23,7 @@
|
||||
#include <linux/smp.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/module.h>
|
||||
-
|
||||
-#include <asm/uaccess.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *);
|
||||
|
||||
@@ -107,7 +106,7 @@ do_page_fault(unsigned long address, uns
|
||||
|
||||
/* If we're in an interrupt context, or have no user context,
|
||||
we must not take the fault. */
|
||||
- if (!mm || in_atomic())
|
||||
+ if (!mm || faulthandler_disabled())
|
||||
goto no_context;
|
||||
|
||||
#ifdef CONFIG_ALPHA_LARGE_VMALLOC
|
||||
--- a/arch/arc/mm/fault.c
|
||||
+++ b/arch/arc/mm/fault.c
|
||||
@@ -86,7 +86,7 @@ void do_page_fault(unsigned long address
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/arm/mm/fault.c
|
||||
+++ b/arch/arm/mm/fault.c
|
||||
@@ -276,7 +276,7 @@ do_page_fault(unsigned long addr, unsign
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/arm64/mm/fault.c
|
||||
+++ b/arch/arm64/mm/fault.c
|
||||
@@ -211,7 +211,7 @@ static int __kprobes do_page_fault(unsig
|
||||
* If we're in an interrupt or have no user context, we must not take
|
||||
* the fault.
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/avr32/mm/fault.c
|
||||
+++ b/arch/avr32/mm/fault.c
|
||||
@@ -14,11 +14,11 @@
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/kprobes.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/sysreg.h>
|
||||
#include <asm/tlb.h>
|
||||
-#include <asm/uaccess.h>
|
||||
|
||||
#ifdef CONFIG_KPROBES
|
||||
static inline int notify_page_fault(struct pt_regs *regs, int trap)
|
||||
@@ -81,7 +81,7 @@ asmlinkage void do_page_fault(unsigned l
|
||||
* If we're in an interrupt or have no user context, we must
|
||||
* not take the fault...
|
||||
*/
|
||||
- if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
|
||||
+ if (faulthandler_disabled() || !mm || regs->sr & SYSREG_BIT(GM))
|
||||
goto no_context;
|
||||
|
||||
local_irq_enable();
|
||||
--- a/arch/cris/mm/fault.c
|
||||
+++ b/arch/cris/mm/fault.c
|
||||
@@ -8,7 +8,7 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/wait.h>
|
||||
-#include <asm/uaccess.h>
|
||||
+#include <linux/uaccess.h>
|
||||
#include <arch/system.h>
|
||||
|
||||
extern int find_fixup_code(struct pt_regs *);
|
||||
@@ -109,11 +109,11 @@ do_page_fault(unsigned long address, str
|
||||
info.si_code = SEGV_MAPERR;
|
||||
|
||||
/*
|
||||
- * If we're in an interrupt or "atomic" operation or have no
|
||||
+ * If we're in an interrupt, have pagefaults disabled or have no
|
||||
* user context, we must not take the fault.
|
||||
*/
|
||||
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/frv/mm/fault.c
|
||||
+++ b/arch/frv/mm/fault.c
|
||||
@@ -19,9 +19,9 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/hardirq.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/gdb-stub.h>
|
||||
|
||||
/*****************************************************************************/
|
||||
@@ -78,7 +78,7 @@ asmlinkage void do_page_fault(int datamm
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if (user_mode(__frame))
|
||||
--- a/arch/ia64/mm/fault.c
|
||||
+++ b/arch/ia64/mm/fault.c
|
||||
@@ -11,10 +11,10 @@
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/prefetch.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/processor.h>
|
||||
-#include <asm/uaccess.h>
|
||||
|
||||
extern int die(char *, struct pt_regs *, long);
|
||||
|
||||
@@ -96,7 +96,7 @@ ia64_do_page_fault (unsigned long addres
|
||||
/*
|
||||
* If we're in an interrupt or have no user context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
#ifdef CONFIG_VIRTUAL_MEM_MAP
|
||||
--- a/arch/m32r/mm/fault.c
|
||||
+++ b/arch/m32r/mm/fault.c
|
||||
@@ -24,9 +24,9 @@
|
||||
#include <linux/vt_kern.h> /* For unblank_screen() */
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/module.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/m32r.h>
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/hardirq.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/tlbflush.h>
|
||||
@@ -111,10 +111,10 @@ asmlinkage void do_page_fault(struct pt_
|
||||
mm = tsk->mm;
|
||||
|
||||
/*
|
||||
- * If we're in an interrupt or have no user context or are running in an
|
||||
- * atomic region then we must not take the fault..
|
||||
+ * If we're in an interrupt or have no user context or have pagefaults
|
||||
+ * disabled then we must not take the fault.
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto bad_area_nosemaphore;
|
||||
|
||||
if (error_code & ACE_USERMODE)
|
||||
--- a/arch/m68k/mm/fault.c
|
||||
+++ b/arch/m68k/mm/fault.c
|
||||
@@ -10,10 +10,10 @@
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/module.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/setup.h>
|
||||
#include <asm/traps.h>
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/pgalloc.h>
|
||||
|
||||
extern void die_if_kernel(char *, struct pt_regs *, long);
|
||||
@@ -81,7 +81,7 @@ int do_page_fault(struct pt_regs *regs,
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/metag/mm/fault.c
|
||||
+++ b/arch/metag/mm/fault.c
|
||||
@@ -105,7 +105,7 @@ int do_page_fault(struct pt_regs *regs,
|
||||
|
||||
mm = tsk->mm;
|
||||
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/microblaze/mm/fault.c
|
||||
+++ b/arch/microblaze/mm/fault.c
|
||||
@@ -107,14 +107,14 @@ void do_page_fault(struct pt_regs *regs,
|
||||
if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)
|
||||
is_write = 0;
|
||||
|
||||
- if (unlikely(in_atomic() || !mm)) {
|
||||
+ if (unlikely(faulthandler_disabled() || !mm)) {
|
||||
if (kernel_mode(regs))
|
||||
goto bad_area_nosemaphore;
|
||||
|
||||
- /* in_atomic() in user mode is really bad,
|
||||
+ /* faulthandler_disabled() in user mode is really bad,
|
||||
as is current->mm == NULL. */
|
||||
- pr_emerg("Page fault in user mode with in_atomic(), mm = %p\n",
|
||||
- mm);
|
||||
+ pr_emerg("Page fault in user mode with faulthandler_disabled(), mm = %p\n",
|
||||
+ mm);
|
||||
pr_emerg("r15 = %lx MSR = %lx\n",
|
||||
regs->r15, regs->msr);
|
||||
die("Weird page fault", regs, SIGSEGV);
|
||||
--- a/arch/mips/mm/fault.c
|
||||
+++ b/arch/mips/mm/fault.c
|
||||
@@ -21,10 +21,10 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/perf_event.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/branch.h>
|
||||
#include <asm/mmu_context.h>
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/highmem.h> /* For VMALLOC_END */
|
||||
#include <linux/kdebug.h>
|
||||
@@ -94,7 +94,7 @@ static void __kprobes __do_page_fault(st
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto bad_area_nosemaphore;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/mn10300/mm/fault.c
|
||||
+++ b/arch/mn10300/mm/fault.c
|
||||
@@ -23,8 +23,8 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/vt_kern.h> /* For unblank_screen() */
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/hardirq.h>
|
||||
#include <asm/cpu-regs.h>
|
||||
@@ -168,7 +168,7 @@ asmlinkage void do_page_fault(struct pt_
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
|
||||
--- a/arch/nios2/mm/fault.c
|
||||
+++ b/arch/nios2/mm/fault.c
|
||||
@@ -77,7 +77,7 @@ asmlinkage void do_page_fault(struct pt_
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto bad_area_nosemaphore;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/parisc/kernel/traps.c
|
||||
+++ b/arch/parisc/kernel/traps.c
|
||||
@@ -26,9 +26,9 @@
|
||||
#include <linux/console.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/ratelimit.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/assembly.h>
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/traps.h>
|
||||
@@ -800,7 +800,7 @@ void notrace handle_interruption(int cod
|
||||
* unless pagefault_disable() was called before.
|
||||
*/
|
||||
|
||||
- if (fault_space == 0 && !in_atomic())
|
||||
+ if (fault_space == 0 && !faulthandler_disabled())
|
||||
{
|
||||
pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC);
|
||||
parisc_terminate("Kernel Fault", regs, code, fault_address);
|
||||
--- a/arch/parisc/mm/fault.c
|
||||
+++ b/arch/parisc/mm/fault.c
|
||||
@@ -15,8 +15,8 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/module.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/traps.h>
|
||||
|
||||
/* Various important other fields */
|
||||
@@ -207,7 +207,7 @@ void do_page_fault(struct pt_regs *regs,
|
||||
int fault;
|
||||
unsigned int flags;
|
||||
|
||||
- if (in_atomic())
|
||||
+ if (pagefault_disabled())
|
||||
goto no_context;
|
||||
|
||||
tsk = current;
|
||||
--- a/arch/powerpc/mm/fault.c
|
||||
+++ b/arch/powerpc/mm/fault.c
|
||||
@@ -33,13 +33,13 @@
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/hugetlb.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/firmware.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/mmu_context.h>
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/siginfo.h>
|
||||
#include <asm/debug.h>
|
||||
@@ -272,15 +272,16 @@ int __kprobes do_page_fault(struct pt_re
|
||||
if (!arch_irq_disabled_regs(regs))
|
||||
local_irq_enable();
|
||||
|
||||
- if (in_atomic() || mm == NULL) {
|
||||
+ if (faulthandler_disabled() || mm == NULL) {
|
||||
if (!user_mode(regs)) {
|
||||
rc = SIGSEGV;
|
||||
goto bail;
|
||||
}
|
||||
- /* in_atomic() in user mode is really bad,
|
||||
+ /* faulthandler_disabled() in user mode is really bad,
|
||||
as is current->mm == NULL. */
|
||||
printk(KERN_EMERG "Page fault in user mode with "
|
||||
- "in_atomic() = %d mm = %p\n", in_atomic(), mm);
|
||||
+ "faulthandler_disabled() = %d mm = %p\n",
|
||||
+ faulthandler_disabled(), mm);
|
||||
printk(KERN_EMERG "NIP = %lx MSR = %lx\n",
|
||||
regs->nip, regs->msr);
|
||||
die("Weird page fault", regs, SIGSEGV);
|
||||
--- a/arch/s390/mm/fault.c
|
||||
+++ b/arch/s390/mm/fault.c
|
||||
@@ -399,7 +399,7 @@ static inline int do_exception(struct pt
|
||||
* user context.
|
||||
*/
|
||||
fault = VM_FAULT_BADCONTEXT;
|
||||
- if (unlikely(!user_space_fault(regs) || in_atomic() || !mm))
|
||||
+ if (unlikely(!user_space_fault(regs) || faulthandler_disabled() || !mm))
|
||||
goto out;
|
||||
|
||||
address = trans_exc_code & __FAIL_ADDR_MASK;
|
||||
--- a/arch/score/mm/fault.c
|
||||
+++ b/arch/score/mm/fault.c
|
||||
@@ -34,6 +34,7 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/ptrace.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
/*
|
||||
* This routine handles page faults. It determines the address,
|
||||
@@ -73,7 +74,7 @@ asmlinkage void do_page_fault(struct pt_
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (pagefault_disabled() || !mm)
|
||||
goto bad_area_nosemaphore;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/sh/mm/fault.c
|
||||
+++ b/arch/sh/mm/fault.c
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/kdebug.h>
|
||||
+#include <linux/uaccess.h>
|
||||
#include <asm/io_trapped.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/tlbflush.h>
|
||||
@@ -438,9 +439,9 @@ asmlinkage void __kprobes do_page_fault(
|
||||
|
||||
/*
|
||||
* If we're in an interrupt, have no user context or are running
|
||||
- * in an atomic region then we must not take the fault:
|
||||
+ * with pagefaults disabled then we must not take the fault:
|
||||
*/
|
||||
- if (unlikely(in_atomic() || !mm)) {
|
||||
+ if (unlikely(faulthandler_disabled() || !mm)) {
|
||||
bad_area_nosemaphore(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
--- a/arch/sparc/mm/fault_32.c
|
||||
+++ b/arch/sparc/mm/fault_32.c
|
||||
@@ -21,6 +21,7 @@
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kdebug.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable.h>
|
||||
@@ -29,7 +30,6 @@
|
||||
#include <asm/setup.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/traps.h>
|
||||
-#include <asm/uaccess.h>
|
||||
|
||||
#include "mm_32.h"
|
||||
|
||||
@@ -196,7 +196,7 @@ asmlinkage void do_sparc_fault(struct pt
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (pagefault_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||
--- a/arch/sparc/mm/fault_64.c
|
||||
+++ b/arch/sparc/mm/fault_64.c
|
||||
@@ -22,12 +22,12 @@
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/context_tracking.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/openprom.h>
|
||||
#include <asm/oplib.h>
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/asi.h>
|
||||
#include <asm/lsu.h>
|
||||
#include <asm/sections.h>
|
||||
@@ -330,7 +330,7 @@ asmlinkage void __kprobes do_sparc64_fau
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto intr_or_no_mm;
|
||||
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||
--- a/arch/sparc/mm/init_64.c
|
||||
+++ b/arch/sparc/mm/init_64.c
|
||||
@@ -2738,7 +2738,7 @@ void hugetlb_setup(struct pt_regs *regs)
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct tsb_config *tp;
|
||||
|
||||
- if (in_atomic() || !mm) {
|
||||
+ if (faulthandler_disabled() || !mm) {
|
||||
const struct exception_table_entry *entry;
|
||||
|
||||
entry = search_exception_tables(regs->tpc);
|
||||
--- a/arch/tile/mm/fault.c
|
||||
+++ b/arch/tile/mm/fault.c
|
||||
@@ -354,9 +354,9 @@ static int handle_page_fault(struct pt_r
|
||||
|
||||
/*
|
||||
* If we're in an interrupt, have no user context or are running in an
|
||||
- * atomic region then we must not take the fault.
|
||||
+ * region with pagefaults disabled then we must not take the fault.
|
||||
*/
|
||||
- if (in_atomic() || !mm) {
|
||||
+ if (pagefault_disabled() || !mm) {
|
||||
vma = NULL; /* happy compiler */
|
||||
goto bad_area_nosemaphore;
|
||||
}
|
||||
--- a/arch/um/kernel/trap.c
|
||||
+++ b/arch/um/kernel/trap.c
|
||||
@@ -35,10 +35,10 @@ int handle_page_fault(unsigned long addr
|
||||
*code_out = SEGV_MAPERR;
|
||||
|
||||
/*
|
||||
- * If the fault was during atomic operation, don't take the fault, just
|
||||
+ * If the fault was with pagefaults disabled, don't take the fault, just
|
||||
* fail.
|
||||
*/
|
||||
- if (in_atomic())
|
||||
+ if (faulthandler_disabled())
|
||||
goto out_nosemaphore;
|
||||
|
||||
if (is_user)
|
||||
--- a/arch/unicore32/mm/fault.c
|
||||
+++ b/arch/unicore32/mm/fault.c
|
||||
@@ -218,7 +218,7 @@ static int do_pf(unsigned long addr, uns
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/x86/mm/fault.c
|
||||
+++ b/arch/x86/mm/fault.c
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <linux/hugetlb.h> /* hstate_index_to_shift */
|
||||
#include <linux/prefetch.h> /* prefetchw */
|
||||
#include <linux/context_tracking.h> /* exception_enter(), ... */
|
||||
+#include <linux/uaccess.h> /* faulthandler_disabled() */
|
||||
|
||||
#include <asm/traps.h> /* dotraplinkage, ... */
|
||||
#include <asm/pgalloc.h> /* pgd_*(), ... */
|
||||
@@ -1126,9 +1127,9 @@ static noinline void
|
||||
|
||||
/*
|
||||
* If we're in an interrupt, have no user context or are running
|
||||
- * in an atomic region then we must not take the fault:
|
||||
+ * in a region with pagefaults disabled then we must not take the fault
|
||||
*/
|
||||
- if (unlikely(in_atomic() || !mm)) {
|
||||
+ if (unlikely(faulthandler_disabled() || !mm)) {
|
||||
bad_area_nosemaphore(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
--- a/arch/xtensa/mm/fault.c
|
||||
+++ b/arch/xtensa/mm/fault.c
|
||||
@@ -15,10 +15,10 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/hardirq.h>
|
||||
+#include <linux/uaccess.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/hardirq.h>
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/pgalloc.h>
|
||||
|
||||
DEFINE_PER_CPU(unsigned long, asid_cache) = ASID_USER_FIRST;
|
||||
@@ -57,7 +57,7 @@ void do_page_fault(struct pt_regs *regs)
|
||||
/* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm) {
|
||||
+ if (faulthandler_disabled() || !mm) {
|
||||
bad_page_fault(regs, address, SIGSEGV);
|
||||
return;
|
||||
}
|
||||
--- a/include/linux/uaccess.h
|
||||
+++ b/include/linux/uaccess.h
|
||||
@@ -59,6 +59,18 @@ static inline void pagefault_enable(void
|
||||
*/
|
||||
#define pagefault_disabled() (current->pagefault_disabled != 0)
|
||||
|
||||
+/*
|
||||
+ * The pagefault handler is in general disabled by pagefault_disable() or
|
||||
+ * when in irq context (via in_atomic()).
|
||||
+ *
|
||||
+ * This function should only be used by the fault handlers. Other users should
|
||||
+ * stick to pagefault_disabled().
|
||||
+ * Please NEVER use preempt_disable() to disable the fault handler. With
|
||||
+ * !CONFIG_PREEMPT_COUNT, this is like a NOP. So the handler won't be disabled.
|
||||
+ * in_atomic() will report different values based on !CONFIG_PREEMPT_COUNT.
|
||||
+ */
|
||||
+#define faulthandler_disabled() (pagefault_disabled() || in_atomic())
|
||||
+
|
||||
#ifndef ARCH_HAS_NOCACHE_UACCESS
|
||||
|
||||
static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
|
|
@ -1,33 +0,0 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:12 +0200
|
||||
Subject: mm/fault, drm/i915: Use pagefault_disabled() to check for disabled pagefaults
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Now that the pagefault disabled counter is in place, we can replace
|
||||
the in_atomic() check by a pagefault_disabled() checks.
|
||||
|
||||
[upstream commit 32d8206725bcf6e3ce7832ac39e61a6ecfd558db]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
|
||||
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
|
||||
@@ -32,6 +32,7 @@
|
||||
#include "i915_trace.h"
|
||||
#include "intel_drv.h"
|
||||
#include <linux/dma_remapping.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#define __EXEC_OBJECT_HAS_PIN (1<<31)
|
||||
#define __EXEC_OBJECT_HAS_FENCE (1<<30)
|
||||
@@ -465,7 +466,7 @@ i915_gem_execbuffer_relocate_entry(struc
|
||||
}
|
||||
|
||||
/* We can't wait for rendering with pagefaults disabled */
|
||||
- if (obj->active && in_atomic())
|
||||
+ if (obj->active && pagefault_disabled())
|
||||
return -EFAULT;
|
||||
|
||||
if (use_cpu_reloc(obj))
|
|
@ -1,46 +0,0 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:13 +0200
|
||||
Subject: sched/preempt, futex: Disable preemption in UP futex_atomic_op_inuser() explicitly
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Let's explicitly disable/enable preemption in the !CONFIG_SMP version
|
||||
of futex_atomic_op_inuser, to prepare for pagefault_disable() not
|
||||
touching preemption anymore.
|
||||
|
||||
Otherwise we might break mutual exclusion when relying on a get_user()/
|
||||
put_user() implementation.
|
||||
|
||||
[upstream commit f3dae07e442a8131a5485b6a38db2aa22a7a48cf]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
include/asm-generic/futex.h | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/include/asm-generic/futex.h
|
||||
+++ b/include/asm-generic/futex.h
|
||||
@@ -8,8 +8,7 @@
|
||||
#ifndef CONFIG_SMP
|
||||
/*
|
||||
* The following implementation only for uniprocessor machines.
|
||||
- * For UP, it's relies on the fact that pagefault_disable() also disables
|
||||
- * preemption to ensure mutual exclusion.
|
||||
+ * It relies on preempt_disable() ensuring mutual exclusion.
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -38,6 +37,7 @@ futex_atomic_op_inuser(int encoded_op, u
|
||||
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
|
||||
oparg = 1 << oparg;
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
|
||||
ret = -EFAULT;
|
||||
@@ -72,6 +72,7 @@ futex_atomic_op_inuser(int encoded_op, u
|
||||
|
||||
out_pagefault_enable:
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
|
||||
if (ret == 0) {
|
||||
switch (cmp) {
|
|
@ -1,37 +0,0 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:14 +0200
|
||||
Subject: sched/preempt, futex: Disable preemption in UP futex_atomic_op_inuser() explicitly
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Let's explicitly disable/enable preemption in the !CONFIG_SMP version
|
||||
of futex_atomic_cmpxchg_inatomic, to prepare for pagefault_disable() not
|
||||
touching preemption anymore. This is needed for this function to be
|
||||
callable from both, atomic and non-atomic context.
|
||||
|
||||
Otherwise we might break mutual exclusion when relying on a get_user()/
|
||||
put_user() implementation.
|
||||
|
||||
[upstream commit f3dae07e442a8131a5485b6a38db2aa22a7a48cf]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
include/asm-generic/futex.h | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/include/asm-generic/futex.h
|
||||
+++ b/include/asm-generic/futex.h
|
||||
@@ -107,6 +107,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
|
||||
{
|
||||
u32 val;
|
||||
|
||||
+ preempt_disable();
|
||||
if (unlikely(get_user(val, uaddr) != 0))
|
||||
return -EFAULT;
|
||||
|
||||
@@ -114,6 +115,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
|
||||
return -EFAULT;
|
||||
|
||||
*uval = val;
|
||||
+ preempt_enable();
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,37 +0,0 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:15 +0200
|
||||
Subject: sched/preempt, arm/futex: Disable preemption in UP futex_atomic_cmpxchg_inatomic() explicitly
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The !CONFIG_SMP implementation of futex_atomic_cmpxchg_inatomic()
|
||||
requires preemption to be disabled to guarantee mutual exclusion.
|
||||
Let's make this explicit.
|
||||
|
||||
This patch is based on a patch by Sebastian Andrzej Siewior on the
|
||||
-rt branch.
|
||||
|
||||
[upstream commit 39919b01ae4c1949736b40b79e27178d0c0bc406]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
arch/arm/include/asm/futex.h | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
--- a/arch/arm/include/asm/futex.h
|
||||
+++ b/arch/arm/include/asm/futex.h
|
||||
@@ -93,6 +93,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
|
||||
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
|
||||
return -EFAULT;
|
||||
|
||||
+ preempt_disable();
|
||||
__asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
|
||||
"1: " TUSER(ldr) " %1, [%4]\n"
|
||||
" teq %1, %2\n"
|
||||
@@ -104,6 +105,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
|
||||
: "cc", "memory");
|
||||
|
||||
*uval = val;
|
||||
+ preempt_enable();
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:16 +0200
|
||||
Subject: sched/preempt, arm/futex: Disable preemption in UP futex_atomic_op_inuser() explicitly
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The !CONFIG_SMP implementation of futex_atomic_op_inuser() seems to rely
|
||||
on disabled preemption to guarantee mutual exclusion.
|
||||
|
||||
From commit e589ed23dd27:
|
||||
"For UP it's enough to disable preemption to ensure mutual exclusion..."
|
||||
From the code itself:
|
||||
"!SMP, we can work around lack of atomic ops by disabling preemption"
|
||||
|
||||
Let's make this explicit, to prepare for pagefault_disable() not
|
||||
touching preemption anymore.
|
||||
|
||||
[upstream commit 388b0e0adbc98a1b12a077dc92851a3ce016db42]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
arch/arm/include/asm/futex.h | 10 ++++++++--
|
||||
1 file changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/arch/arm/include/asm/futex.h
|
||||
+++ b/arch/arm/include/asm/futex.h
|
||||
@@ -127,7 +127,10 @@ futex_atomic_op_inuser (int encoded_op,
|
||||
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
|
||||
return -EFAULT;
|
||||
|
||||
- pagefault_disable(); /* implies preempt_disable() */
|
||||
+#ifndef CONFIG_SMP
|
||||
+ preempt_disable();
|
||||
+#endif
|
||||
+ pagefault_disable();
|
||||
|
||||
switch (op) {
|
||||
case FUTEX_OP_SET:
|
||||
@@ -149,7 +152,10 @@ futex_atomic_op_inuser (int encoded_op,
|
||||
ret = -ENOSYS;
|
||||
}
|
||||
|
||||
- pagefault_enable(); /* subsumes preempt_enable() */
|
||||
+ pagefault_enable();
|
||||
+#ifndef CONFIG_SMP
|
||||
+ preempt_enable();
|
||||
+#endif
|
||||
|
||||
if (!ret) {
|
||||
switch (cmp) {
|
|
@ -1,86 +0,0 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:17 +0200
|
||||
Subject: sched/preempt, futex: Update comments to clarify that preemption doesn't have to be disabled
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
As arm64 and arc have no special implementations for !CONFIG_SMP, mutual
|
||||
exclusion doesn't seem to rely on preemption.
|
||||
|
||||
Let's make it clear in the comments that preemption doesn't have to be
|
||||
disabled when accessing user space in the futex code, so we can remove
|
||||
preempt_disable() from pagefault_disable().
|
||||
|
||||
[upstream commit 2f09b227eeed4b3a072fe818c82a4c773b778cde]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
arch/arc/include/asm/futex.h | 10 +++++-----
|
||||
arch/arm64/include/asm/futex.h | 4 ++--
|
||||
2 files changed, 7 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/arch/arc/include/asm/futex.h
|
||||
+++ b/arch/arc/include/asm/futex.h
|
||||
@@ -53,7 +53,7 @@ static inline int futex_atomic_op_inuser
|
||||
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
|
||||
return -EFAULT;
|
||||
|
||||
- pagefault_disable(); /* implies preempt_disable() */
|
||||
+ pagefault_disable();
|
||||
|
||||
switch (op) {
|
||||
case FUTEX_OP_SET:
|
||||
@@ -75,7 +75,7 @@ static inline int futex_atomic_op_inuser
|
||||
ret = -ENOSYS;
|
||||
}
|
||||
|
||||
- pagefault_enable(); /* subsumes preempt_enable() */
|
||||
+ pagefault_enable();
|
||||
|
||||
if (!ret) {
|
||||
switch (cmp) {
|
||||
@@ -104,7 +104,7 @@ static inline int futex_atomic_op_inuser
|
||||
return ret;
|
||||
}
|
||||
|
||||
-/* Compare-xchg with preemption disabled.
|
||||
+/* Compare-xchg with pagefaults disabled.
|
||||
* Notes:
|
||||
* -Best-Effort: Exchg happens only if compare succeeds.
|
||||
* If compare fails, returns; leaving retry/looping to upper layers
|
||||
@@ -121,7 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
|
||||
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
|
||||
return -EFAULT;
|
||||
|
||||
- pagefault_disable(); /* implies preempt_disable() */
|
||||
+ pagefault_disable();
|
||||
|
||||
/* TBD : can use llock/scond */
|
||||
__asm__ __volatile__(
|
||||
@@ -142,7 +142,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
|
||||
: "r"(oldval), "r"(newval), "r"(uaddr), "ir"(-EFAULT)
|
||||
: "cc", "memory");
|
||||
|
||||
- pagefault_enable(); /* subsumes preempt_enable() */
|
||||
+ pagefault_enable();
|
||||
|
||||
*uval = val;
|
||||
return val;
|
||||
--- a/arch/arm64/include/asm/futex.h
|
||||
+++ b/arch/arm64/include/asm/futex.h
|
||||
@@ -58,7 +58,7 @@ futex_atomic_op_inuser (int encoded_op,
|
||||
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
|
||||
return -EFAULT;
|
||||
|
||||
- pagefault_disable(); /* implies preempt_disable() */
|
||||
+ pagefault_disable();
|
||||
|
||||
switch (op) {
|
||||
case FUTEX_OP_SET:
|
||||
@@ -85,7 +85,7 @@ futex_atomic_op_inuser (int encoded_op,
|
||||
ret = -ENOSYS;
|
||||
}
|
||||
|
||||
- pagefault_enable(); /* subsumes preempt_enable() */
|
||||
+ pagefault_enable();
|
||||
|
||||
if (!ret) {
|
||||
switch (cmp) {
|
|
@ -1,34 +0,0 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:19 +0200
|
||||
Subject: sched/preempt, MIPS: Properly lock access to the FPU
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Let's always disable preemption and pagefaults when locking the fpu,
|
||||
so we can be sure that the owner won't change in between.
|
||||
|
||||
This is a preparation for pagefault_disable() not touching preemption
|
||||
anymore.
|
||||
|
||||
[upstream commit 76deabd1867d6d2895152f31fdec819e3505738b]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
arch/mips/kernel/signal-common.h | 9 ++-------
|
||||
1 file changed, 2 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/arch/mips/kernel/signal-common.h
|
||||
+++ b/arch/mips/kernel/signal-common.h
|
||||
@@ -28,12 +28,7 @@ extern void __user *get_sigframe(struct
|
||||
extern int fpcsr_pending(unsigned int __user *fpcsr);
|
||||
|
||||
/* Make sure we will not lose FPU ownership */
|
||||
-#ifdef CONFIG_PREEMPT
|
||||
-#define lock_fpu_owner() preempt_disable()
|
||||
-#define unlock_fpu_owner() preempt_enable()
|
||||
-#else
|
||||
-#define lock_fpu_owner() pagefault_disable()
|
||||
-#define unlock_fpu_owner() pagefault_enable()
|
||||
-#endif
|
||||
+#define lock_fpu_owner() ({ preempt_disable(); pagefault_disable(); })
|
||||
+#define unlock_fpu_owner() ({ pagefault_enable(); preempt_enable(); })
|
||||
|
||||
#endif /* __SIGNAL_COMMON_H */
|
|
@ -1,61 +0,0 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:20 +0200
|
||||
Subject: sched/preempt, mm/fault: Decouple preemption from the page fault logic
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
As the fault handlers now all rely on the pagefault_disabled() checks
|
||||
and implicit preempt_disable() calls by pagefault_disable() have been
|
||||
made explicit, we can completely rely on the pagefault_disableD counter.
|
||||
|
||||
So let's no longer touch the preempt count when disabling/enabling
|
||||
pagefaults. After a call to pagefault_disable(), pagefault_disabled()
|
||||
will return true, but in_atomic() won't.
|
||||
|
||||
[upstream commit 8222dbe21e79338de92d5e1956cd1e3994cc9f93]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
include/linux/uaccess.h | 16 ++--------------
|
||||
1 file changed, 2 insertions(+), 14 deletions(-)
|
||||
|
||||
--- a/include/linux/uaccess.h
|
||||
+++ b/include/linux/uaccess.h
|
||||
@@ -1,7 +1,6 @@
|
||||
#ifndef __LINUX_UACCESS_H__
|
||||
#define __LINUX_UACCESS_H__
|
||||
|
||||
-#include <linux/preempt.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
@@ -20,17 +19,11 @@ static __always_inline void pagefault_di
|
||||
* These routines enable/disable the pagefault handler. If disabled, it will
|
||||
* not take any locks and go straight to the fixup table.
|
||||
*
|
||||
- * We increase the preempt and the pagefault count, to be able to distinguish
|
||||
- * whether we run in simple atomic context or in a real pagefault_disable()
|
||||
- * context.
|
||||
- *
|
||||
- * For now, after pagefault_disabled() has been called, we run in atomic
|
||||
- * context. User access methods will not sleep.
|
||||
- *
|
||||
+ * User access methods will not sleep when called from a pagefault_disabled()
|
||||
+ * environment.
|
||||
*/
|
||||
static inline void pagefault_disable(void)
|
||||
{
|
||||
- preempt_count_inc();
|
||||
pagefault_disabled_inc();
|
||||
/*
|
||||
* make sure to have issued the store before a pagefault
|
||||
@@ -47,11 +40,6 @@ static inline void pagefault_enable(void
|
||||
*/
|
||||
barrier();
|
||||
pagefault_disabled_dec();
|
||||
-#ifndef CONFIG_PREEMPT
|
||||
- preempt_count_dec();
|
||||
-#else
|
||||
- preempt_enable();
|
||||
-#endif
|
||||
}
|
||||
|
||||
/*
|
|
@ -1,40 +0,0 @@
|
|||
From: Yong Zhang <yong.zhang at windriver.com>
|
||||
Date: Thu, 29 Jan 2015 12:56:18 -0600
|
||||
Subject: ARM: cmpxchg: define __HAVE_ARCH_CMPXCHG for armv6 and later
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Both pi_stress and sigwaittest in rt-test show performance gain with
|
||||
__HAVE_ARCH_CMPXCHG. Testing result on coretile_express_a9x4:
|
||||
|
||||
pi_stress -p 99 --duration=300 (on linux-3.4-rc5; bigger is better)
|
||||
vanilla: Total inversion performed: 5493381
|
||||
patched: Total inversion performed: 5621746
|
||||
|
||||
sigwaittest -p 99 -l 100000 (on linux-3.4-rc5-rt6; less is better)
|
||||
3.4-rc5-rt6: Min 24, Cur 27, Avg 30, Max 98
|
||||
patched: Min 19, Cur 21, Avg 23, Max 96
|
||||
|
||||
Signed-off-by: Yong Zhang <yong.zhang0 at gmail.com>
|
||||
Cc: Russell King <rmk+kernel at arm.linux.org.uk>
|
||||
Cc: Nicolas Pitre <nico at linaro.org>
|
||||
Cc: Will Deacon <will.deacon at arm.com>
|
||||
Cc: Catalin Marinas <catalin.marinas at arm.com>
|
||||
Cc: Thomas Gleixner <tglx at linutronix.de>
|
||||
Cc: linux-arm-kernel at lists.infradead.org
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/include/asm/cmpxchg.h | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/arch/arm/include/asm/cmpxchg.h
|
||||
+++ b/arch/arm/include/asm/cmpxchg.h
|
||||
@@ -129,6 +129,8 @@ static inline unsigned long __xchg(unsig
|
||||
|
||||
#else /* min ARCH >= ARMv6 */
|
||||
|
||||
+#define __HAVE_ARCH_CMPXCHG 1
|
||||
+
|
||||
extern void __bad_cmpxchg(volatile void *ptr, int size);
|
||||
|
||||
/*
|
|
@ -1,86 +0,0 @@
|
|||
From: "Yadi.hu" <yadi.hu@windriver.com>
|
||||
Date: Wed, 10 Dec 2014 10:32:09 +0800
|
||||
Subject: ARM: enable irq in translation/section permission fault handlers
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Probably happens on all ARM, with
|
||||
CONFIG_PREEMPT_RT_FULL
|
||||
CONFIG_DEBUG_ATOMIC_SLEEP
|
||||
|
||||
This simple program....
|
||||
|
||||
int main() {
|
||||
*((char*)0xc0001000) = 0;
|
||||
};
|
||||
|
||||
[ 512.742724] BUG: sleeping function called from invalid context at kernel/rtmutex.c:658
|
||||
[ 512.743000] in_atomic(): 0, irqs_disabled(): 128, pid: 994, name: a
|
||||
[ 512.743217] INFO: lockdep is turned off.
|
||||
[ 512.743360] irq event stamp: 0
|
||||
[ 512.743482] hardirqs last enabled at (0): [< (null)>] (null)
|
||||
[ 512.743714] hardirqs last disabled at (0): [<c0426370>] copy_process+0x3b0/0x11c0
|
||||
[ 512.744013] softirqs last enabled at (0): [<c0426370>] copy_process+0x3b0/0x11c0
|
||||
[ 512.744303] softirqs last disabled at (0): [< (null)>] (null)
|
||||
[ 512.744631] [<c041872c>] (unwind_backtrace+0x0/0x104)
|
||||
[ 512.745001] [<c09af0c4>] (dump_stack+0x20/0x24)
|
||||
[ 512.745355] [<c0462490>] (__might_sleep+0x1dc/0x1e0)
|
||||
[ 512.745717] [<c09b6770>] (rt_spin_lock+0x34/0x6c)
|
||||
[ 512.746073] [<c0441bf0>] (do_force_sig_info+0x34/0xf0)
|
||||
[ 512.746457] [<c0442668>] (force_sig_info+0x18/0x1c)
|
||||
[ 512.746829] [<c041d880>] (__do_user_fault+0x9c/0xd8)
|
||||
[ 512.747185] [<c041d938>] (do_bad_area+0x7c/0x94)
|
||||
[ 512.747536] [<c041d990>] (do_sect_fault+0x40/0x48)
|
||||
[ 512.747898] [<c040841c>] (do_DataAbort+0x40/0xa0)
|
||||
[ 512.748181] Exception stack(0xecaa1fb0 to 0xecaa1ff8)
|
||||
|
||||
Oxc0000000 belongs to kernel address space, user task can not be
|
||||
allowed to access it. For above condition, correct result is that
|
||||
test case should receive a “segment fault” and exits but not stacks.
|
||||
|
||||
the root cause is commit 02fe2845d6a8 ("avoid enabling interrupts in
|
||||
prefetch/data abort handlers"),it deletes irq enable block in Data
|
||||
abort assemble code and move them into page/breakpiont/alignment fault
|
||||
handlers instead. But author does not enable irq in translation/section
|
||||
permission fault handlers. ARM disables irq when it enters exception/
|
||||
interrupt mode, if kernel doesn't enable irq, it would be still disabled
|
||||
during translation/section permission fault.
|
||||
|
||||
We see the above splat because do_force_sig_info is still called with
|
||||
IRQs off, and that code eventually does a:
|
||||
|
||||
spin_lock_irqsave(&t->sighand->siglock, flags);
|
||||
|
||||
As this is architecture independent code, and we've not seen any other
|
||||
need for other arch to have the siglock converted to raw lock, we can
|
||||
conclude that we should enable irq for ARM translation/section
|
||||
permission exception.
|
||||
|
||||
|
||||
Signed-off-by: Yadi.hu <yadi.hu@windriver.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/mm/fault.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
--- a/arch/arm/mm/fault.c
|
||||
+++ b/arch/arm/mm/fault.c
|
||||
@@ -430,6 +430,9 @@ do_translation_fault(unsigned long addr,
|
||||
if (addr < TASK_SIZE)
|
||||
return do_page_fault(addr, fsr, regs);
|
||||
|
||||
+ if (interrupts_enabled(regs))
|
||||
+ local_irq_enable();
|
||||
+
|
||||
if (user_mode(regs))
|
||||
goto bad_area;
|
||||
|
||||
@@ -497,6 +500,9 @@ do_translation_fault(unsigned long addr,
|
||||
static int
|
||||
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
|
||||
{
|
||||
+ if (interrupts_enabled(regs))
|
||||
+ local_irq_enable();
|
||||
+
|
||||
do_bad_area(addr, fsr, regs);
|
||||
return 0;
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 11 Jun 2015 14:17:06 +0200
|
||||
Subject: ASoC: Intel: sst: use ; instead of , at the of a C statement
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
This was spotted by Fernando Lopez-Lezcano <nando@ccrma.Stanford.EDU>
|
||||
while he tried to compile a -RT kernel with this driver enabled.
|
||||
"make C=2" would also warn about this. This is is based on his patch.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
sound/soc/intel/atom/sst/sst.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/sound/soc/intel/atom/sst/sst.c
|
||||
+++ b/sound/soc/intel/atom/sst/sst.c
|
||||
@@ -368,8 +368,8 @@ static inline void sst_restore_shim64(st
|
||||
* initialize by FW or driver when firmware is loaded
|
||||
*/
|
||||
spin_lock_irqsave(&ctx->ipc_spin_lock, irq_flags);
|
||||
- sst_shim_write64(shim, SST_IMRX, shim_regs->imrx),
|
||||
- sst_shim_write64(shim, SST_CSR, shim_regs->csr),
|
||||
+ sst_shim_write64(shim, SST_IMRX, shim_regs->imrx);
|
||||
+ sst_shim_write64(shim, SST_CSR, shim_regs->csr);
|
||||
spin_unlock_irqrestore(&ctx->ipc_spin_lock, irq_flags);
|
||||
}
|
||||
|
|
@ -1,77 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 21 Mar 2013 19:01:05 +0100
|
||||
Subject: printk: Drop the logbuf_lock more often
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The lock is hold with irgs off. The latency drops 500us+ on my arm bugs
|
||||
with a "full" buffer after executing "dmesg" on the shell.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 27 ++++++++++++++++++++++++++-
|
||||
1 file changed, 26 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -1162,6 +1162,7 @@ static int syslog_print_all(char __user
|
||||
{
|
||||
char *text;
|
||||
int len = 0;
|
||||
+ int attempts = 0;
|
||||
|
||||
text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
|
||||
if (!text)
|
||||
@@ -1173,7 +1174,14 @@ static int syslog_print_all(char __user
|
||||
u64 seq;
|
||||
u32 idx;
|
||||
enum log_flags prev;
|
||||
-
|
||||
+ int num_msg;
|
||||
+try_again:
|
||||
+ attempts++;
|
||||
+ if (attempts > 10) {
|
||||
+ len = -EBUSY;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ num_msg = 0;
|
||||
if (clear_seq < log_first_seq) {
|
||||
/* messages are gone, move to first available one */
|
||||
clear_seq = log_first_seq;
|
||||
@@ -1194,6 +1202,14 @@ static int syslog_print_all(char __user
|
||||
prev = msg->flags;
|
||||
idx = log_next(idx);
|
||||
seq++;
|
||||
+ num_msg++;
|
||||
+ if (num_msg > 5) {
|
||||
+ num_msg = 0;
|
||||
+ raw_spin_unlock_irq(&logbuf_lock);
|
||||
+ raw_spin_lock_irq(&logbuf_lock);
|
||||
+ if (clear_seq < log_first_seq)
|
||||
+ goto try_again;
|
||||
+ }
|
||||
}
|
||||
|
||||
/* move first record forward until length fits into the buffer */
|
||||
@@ -1207,6 +1223,14 @@ static int syslog_print_all(char __user
|
||||
prev = msg->flags;
|
||||
idx = log_next(idx);
|
||||
seq++;
|
||||
+ num_msg++;
|
||||
+ if (num_msg > 5) {
|
||||
+ num_msg = 0;
|
||||
+ raw_spin_unlock_irq(&logbuf_lock);
|
||||
+ raw_spin_lock_irq(&logbuf_lock);
|
||||
+ if (clear_seq < log_first_seq)
|
||||
+ goto try_again;
|
||||
+ }
|
||||
}
|
||||
|
||||
/* last message fitting into this dump */
|
||||
@@ -1247,6 +1271,7 @@ static int syslog_print_all(char __user
|
||||
clear_seq = log_next_seq;
|
||||
clear_idx = log_next_idx;
|
||||
}
|
||||
+out:
|
||||
raw_spin_unlock_irq(&logbuf_lock);
|
||||
|
||||
kfree(text);
|
|
@ -1,101 +0,0 @@
|
|||
From: Marcelo Tosatti <mtosatti@redhat.com>
|
||||
Date: Wed, 8 Apr 2015 20:33:25 -0300
|
||||
Subject: KVM: lapic: mark LAPIC timer handler as irqsafe
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Since lapic timer handler only wakes up a simple waitqueue,
|
||||
it can be executed from hardirq context.
|
||||
|
||||
Also handle the case where hrtimer_start_expires fails due to -ETIME,
|
||||
by injecting the interrupt to the guest immediately.
|
||||
|
||||
Reduces average cyclictest latency by 3us.
|
||||
|
||||
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/x86/kvm/lapic.c | 40 +++++++++++++++++++++++++++++++++++++---
|
||||
1 file changed, 37 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/arch/x86/kvm/lapic.c
|
||||
+++ b/arch/x86/kvm/lapic.c
|
||||
@@ -1167,8 +1167,36 @@ void wait_lapic_expire(struct kvm_vcpu *
|
||||
__delay(tsc_deadline - guest_tsc);
|
||||
}
|
||||
|
||||
+static enum hrtimer_restart apic_timer_fn(struct hrtimer *data);
|
||||
+
|
||||
+static void __apic_timer_expired(struct hrtimer *data)
|
||||
+{
|
||||
+ int ret, i = 0;
|
||||
+ enum hrtimer_restart r;
|
||||
+ struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
|
||||
+
|
||||
+ r = apic_timer_fn(data);
|
||||
+
|
||||
+ if (r == HRTIMER_RESTART) {
|
||||
+ do {
|
||||
+ ret = hrtimer_start_expires(data, HRTIMER_MODE_ABS);
|
||||
+ if (ret == -ETIME)
|
||||
+ hrtimer_add_expires_ns(&ktimer->timer,
|
||||
+ ktimer->period);
|
||||
+ i++;
|
||||
+ } while (ret == -ETIME && i < 10);
|
||||
+
|
||||
+ if (ret == -ETIME) {
|
||||
+ printk_once(KERN_ERR "%s: failed to reprogram timer\n",
|
||||
+ __func__);
|
||||
+ WARN_ON_ONCE(1);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void start_apic_timer(struct kvm_lapic *apic)
|
||||
{
|
||||
+ int ret;
|
||||
ktime_t now;
|
||||
|
||||
atomic_set(&apic->lapic_timer.pending, 0);
|
||||
@@ -1199,9 +1227,11 @@ static void start_apic_timer(struct kvm_
|
||||
}
|
||||
}
|
||||
|
||||
- hrtimer_start(&apic->lapic_timer.timer,
|
||||
+ ret = hrtimer_start(&apic->lapic_timer.timer,
|
||||
ktime_add_ns(now, apic->lapic_timer.period),
|
||||
HRTIMER_MODE_ABS);
|
||||
+ if (ret == -ETIME)
|
||||
+ __apic_timer_expired(&apic->lapic_timer.timer);
|
||||
|
||||
apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
|
||||
PRIx64 ", "
|
||||
@@ -1233,8 +1263,10 @@ static void start_apic_timer(struct kvm_
|
||||
do_div(ns, this_tsc_khz);
|
||||
expire = ktime_add_ns(now, ns);
|
||||
expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
|
||||
- hrtimer_start(&apic->lapic_timer.timer,
|
||||
+ ret = hrtimer_start(&apic->lapic_timer.timer,
|
||||
expire, HRTIMER_MODE_ABS);
|
||||
+ if (ret == -ETIME)
|
||||
+ __apic_timer_expired(&apic->lapic_timer.timer);
|
||||
} else
|
||||
apic_timer_expired(apic);
|
||||
|
||||
@@ -1707,6 +1739,7 @@ int kvm_create_lapic(struct kvm_vcpu *vc
|
||||
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
|
||||
HRTIMER_MODE_ABS);
|
||||
apic->lapic_timer.timer.function = apic_timer_fn;
|
||||
+ apic->lapic_timer.timer.irqsafe = 1;
|
||||
|
||||
/*
|
||||
* APIC is created enabled. This will prevent kvm_lapic_set_base from
|
||||
@@ -1834,7 +1867,8 @@ void __kvm_migrate_apic_timer(struct kvm
|
||||
|
||||
timer = &vcpu->arch.apic->lapic_timer.timer;
|
||||
if (hrtimer_cancel(timer))
|
||||
- hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
|
||||
+ if (hrtimer_start_expires(timer, HRTIMER_MODE_ABS) == -ETIME)
|
||||
+ __apic_timer_expired(timer);
|
||||
}
|
||||
|
||||
/*
|
|
@ -1,342 +0,0 @@
|
|||
From: Marcelo Tosatti <mtosatti@redhat.com>
|
||||
Date: Wed, 8 Apr 2015 20:33:24 -0300
|
||||
Subject: KVM: use simple waitqueue for vcpu->wq
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The problem:
|
||||
|
||||
On -RT, an emulated LAPIC timer instances has the following path:
|
||||
|
||||
1) hard interrupt
|
||||
2) ksoftirqd is scheduled
|
||||
3) ksoftirqd wakes up vcpu thread
|
||||
4) vcpu thread is scheduled
|
||||
|
||||
This extra context switch introduces unnecessary latency in the
|
||||
LAPIC path for a KVM guest.
|
||||
|
||||
The solution:
|
||||
|
||||
Allow waking up vcpu thread from hardirq context,
|
||||
thus avoiding the need for ksoftirqd to be scheduled.
|
||||
|
||||
Normal waitqueues make use of spinlocks, which on -RT
|
||||
are sleepable locks. Therefore, waking up a waitqueue
|
||||
waiter involves locking a sleeping lock, which
|
||||
is not allowed from hard interrupt context.
|
||||
|
||||
cyclictest command line:
|
||||
# cyclictest -m -n -q -p99 -l 1000000 -h60 -D 1m
|
||||
|
||||
This patch reduces the average latency in my tests from 14us to 11us.
|
||||
|
||||
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/kvm/arm.c | 4 ++--
|
||||
arch/arm/kvm/psci.c | 4 ++--
|
||||
arch/powerpc/include/asm/kvm_host.h | 4 ++--
|
||||
arch/powerpc/kvm/book3s_hv.c | 23 +++++++++++------------
|
||||
arch/s390/include/asm/kvm_host.h | 2 +-
|
||||
arch/s390/kvm/interrupt.c | 8 ++++----
|
||||
arch/x86/kvm/lapic.c | 6 +++---
|
||||
include/linux/kvm_host.h | 4 ++--
|
||||
virt/kvm/async_pf.c | 4 ++--
|
||||
virt/kvm/kvm_main.c | 16 ++++++++--------
|
||||
10 files changed, 37 insertions(+), 38 deletions(-)
|
||||
|
||||
--- a/arch/arm/kvm/arm.c
|
||||
+++ b/arch/arm/kvm/arm.c
|
||||
@@ -474,9 +474,9 @@ bool kvm_arch_intc_initialized(struct kv
|
||||
|
||||
static void vcpu_pause(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
- wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
|
||||
+ struct swait_head *wq = kvm_arch_vcpu_wq(vcpu);
|
||||
|
||||
- wait_event_interruptible(*wq, !vcpu->arch.pause);
|
||||
+ swait_event_interruptible(*wq, !vcpu->arch.pause);
|
||||
}
|
||||
|
||||
static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
|
||||
--- a/arch/arm/kvm/psci.c
|
||||
+++ b/arch/arm/kvm/psci.c
|
||||
@@ -68,7 +68,7 @@ static unsigned long kvm_psci_vcpu_on(st
|
||||
{
|
||||
struct kvm *kvm = source_vcpu->kvm;
|
||||
struct kvm_vcpu *vcpu = NULL;
|
||||
- wait_queue_head_t *wq;
|
||||
+ struct swait_head *wq;
|
||||
unsigned long cpu_id;
|
||||
unsigned long context_id;
|
||||
phys_addr_t target_pc;
|
||||
@@ -117,7 +117,7 @@ static unsigned long kvm_psci_vcpu_on(st
|
||||
smp_mb(); /* Make sure the above is visible */
|
||||
|
||||
wq = kvm_arch_vcpu_wq(vcpu);
|
||||
- wake_up_interruptible(wq);
|
||||
+ swait_wake_interruptible(wq);
|
||||
|
||||
return PSCI_RET_SUCCESS;
|
||||
}
|
||||
--- a/arch/powerpc/include/asm/kvm_host.h
|
||||
+++ b/arch/powerpc/include/asm/kvm_host.h
|
||||
@@ -280,7 +280,7 @@ struct kvmppc_vcore {
|
||||
u8 in_guest;
|
||||
struct list_head runnable_threads;
|
||||
spinlock_t lock;
|
||||
- wait_queue_head_t wq;
|
||||
+ struct swait_head wq;
|
||||
spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
|
||||
u64 stolen_tb;
|
||||
u64 preempt_tb;
|
||||
@@ -613,7 +613,7 @@ struct kvm_vcpu_arch {
|
||||
u8 prodded;
|
||||
u32 last_inst;
|
||||
|
||||
- wait_queue_head_t *wqp;
|
||||
+ struct swait_head *wqp;
|
||||
struct kvmppc_vcore *vcore;
|
||||
int ret;
|
||||
int trap;
|
||||
--- a/arch/powerpc/kvm/book3s_hv.c
|
||||
+++ b/arch/powerpc/kvm/book3s_hv.c
|
||||
@@ -115,11 +115,11 @@ static bool kvmppc_ipi_thread(int cpu)
|
||||
static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int cpu = vcpu->cpu;
|
||||
- wait_queue_head_t *wqp;
|
||||
+ struct swait_head *wqp;
|
||||
|
||||
wqp = kvm_arch_vcpu_wq(vcpu);
|
||||
- if (waitqueue_active(wqp)) {
|
||||
- wake_up_interruptible(wqp);
|
||||
+ if (swaitqueue_active(wqp)) {
|
||||
+ swait_wake_interruptible(wqp);
|
||||
++vcpu->stat.halt_wakeup;
|
||||
}
|
||||
|
||||
@@ -686,8 +686,8 @@ int kvmppc_pseries_do_hcall(struct kvm_v
|
||||
tvcpu->arch.prodded = 1;
|
||||
smp_mb();
|
||||
if (vcpu->arch.ceded) {
|
||||
- if (waitqueue_active(&vcpu->wq)) {
|
||||
- wake_up_interruptible(&vcpu->wq);
|
||||
+ if (swaitqueue_active(&vcpu->wq)) {
|
||||
+ swait_wake_interruptible(&vcpu->wq);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
}
|
||||
}
|
||||
@@ -1426,7 +1426,7 @@ static struct kvmppc_vcore *kvmppc_vcore
|
||||
INIT_LIST_HEAD(&vcore->runnable_threads);
|
||||
spin_lock_init(&vcore->lock);
|
||||
spin_lock_init(&vcore->stoltb_lock);
|
||||
- init_waitqueue_head(&vcore->wq);
|
||||
+ init_swait_head(&vcore->wq);
|
||||
vcore->preempt_tb = TB_NIL;
|
||||
vcore->lpcr = kvm->arch.lpcr;
|
||||
vcore->first_vcpuid = core * threads_per_subcore;
|
||||
@@ -2073,10 +2073,9 @@ static void kvmppc_vcore_blocked(struct
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
int do_sleep = 1;
|
||||
+ DEFINE_SWAITER(wait);
|
||||
|
||||
- DEFINE_WAIT(wait);
|
||||
-
|
||||
- prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
+ swait_prepare(&vc->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
|
||||
/*
|
||||
* Check one last time for pending exceptions and ceded state after
|
||||
@@ -2090,7 +2089,7 @@ static void kvmppc_vcore_blocked(struct
|
||||
}
|
||||
|
||||
if (!do_sleep) {
|
||||
- finish_wait(&vc->wq, &wait);
|
||||
+ swait_finish(&vc->wq, &wait);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2098,7 +2097,7 @@ static void kvmppc_vcore_blocked(struct
|
||||
trace_kvmppc_vcore_blocked(vc, 0);
|
||||
spin_unlock(&vc->lock);
|
||||
schedule();
|
||||
- finish_wait(&vc->wq, &wait);
|
||||
+ swait_finish(&vc->wq, &wait);
|
||||
spin_lock(&vc->lock);
|
||||
vc->vcore_state = VCORE_INACTIVE;
|
||||
trace_kvmppc_vcore_blocked(vc, 1);
|
||||
@@ -2142,7 +2141,7 @@ static int kvmppc_run_vcpu(struct kvm_ru
|
||||
kvmppc_start_thread(vcpu);
|
||||
trace_kvm_guest_enter(vcpu);
|
||||
} else if (vc->vcore_state == VCORE_SLEEPING) {
|
||||
- wake_up(&vc->wq);
|
||||
+ swait_wake(&vc->wq);
|
||||
}
|
||||
|
||||
}
|
||||
--- a/arch/s390/include/asm/kvm_host.h
|
||||
+++ b/arch/s390/include/asm/kvm_host.h
|
||||
@@ -419,7 +419,7 @@ struct kvm_s390_irq_payload {
|
||||
struct kvm_s390_local_interrupt {
|
||||
spinlock_t lock;
|
||||
struct kvm_s390_float_interrupt *float_int;
|
||||
- wait_queue_head_t *wq;
|
||||
+ struct swait_head *wq;
|
||||
atomic_t *cpuflags;
|
||||
DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
|
||||
struct kvm_s390_irq_payload irq;
|
||||
--- a/arch/s390/kvm/interrupt.c
|
||||
+++ b/arch/s390/kvm/interrupt.c
|
||||
@@ -875,13 +875,13 @@ int kvm_s390_handle_wait(struct kvm_vcpu
|
||||
|
||||
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
- if (waitqueue_active(&vcpu->wq)) {
|
||||
+ if (swaitqueue_active(&vcpu->wq)) {
|
||||
/*
|
||||
* The vcpu gave up the cpu voluntarily, mark it as a good
|
||||
* yield-candidate.
|
||||
*/
|
||||
vcpu->preempted = true;
|
||||
- wake_up_interruptible(&vcpu->wq);
|
||||
+ swait_wake_interruptible(&vcpu->wq);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
}
|
||||
}
|
||||
@@ -987,7 +987,7 @@ int kvm_s390_inject_program_int(struct k
|
||||
spin_lock(&li->lock);
|
||||
irq.u.pgm.code = code;
|
||||
__inject_prog(vcpu, &irq);
|
||||
- BUG_ON(waitqueue_active(li->wq));
|
||||
+ BUG_ON(swaitqueue_active(li->wq));
|
||||
spin_unlock(&li->lock);
|
||||
return 0;
|
||||
}
|
||||
@@ -1006,7 +1006,7 @@ int kvm_s390_inject_prog_irq(struct kvm_
|
||||
spin_lock(&li->lock);
|
||||
irq.u.pgm = *pgm_info;
|
||||
rc = __inject_prog(vcpu, &irq);
|
||||
- BUG_ON(waitqueue_active(li->wq));
|
||||
+ BUG_ON(swaitqueue_active(li->wq));
|
||||
spin_unlock(&li->lock);
|
||||
return rc;
|
||||
}
|
||||
--- a/arch/x86/kvm/lapic.c
|
||||
+++ b/arch/x86/kvm/lapic.c
|
||||
@@ -1104,7 +1104,7 @@ static void apic_update_lvtt(struct kvm_
|
||||
static void apic_timer_expired(struct kvm_lapic *apic)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = apic->vcpu;
|
||||
- wait_queue_head_t *q = &vcpu->wq;
|
||||
+ struct swait_head *q = &vcpu->wq;
|
||||
struct kvm_timer *ktimer = &apic->lapic_timer;
|
||||
|
||||
if (atomic_read(&apic->lapic_timer.pending))
|
||||
@@ -1113,8 +1113,8 @@ static void apic_timer_expired(struct kv
|
||||
atomic_inc(&apic->lapic_timer.pending);
|
||||
kvm_set_pending_timer(vcpu);
|
||||
|
||||
- if (waitqueue_active(q))
|
||||
- wake_up_interruptible(q);
|
||||
+ if (swaitqueue_active(q))
|
||||
+ swait_wake_interruptible(q);
|
||||
|
||||
if (apic_lvtt_tscdeadline(apic))
|
||||
ktimer->expired_tscdeadline = ktimer->tscdeadline;
|
||||
--- a/include/linux/kvm_host.h
|
||||
+++ b/include/linux/kvm_host.h
|
||||
@@ -230,7 +230,7 @@ struct kvm_vcpu {
|
||||
|
||||
int fpu_active;
|
||||
int guest_fpu_loaded, guest_xcr0_loaded;
|
||||
- wait_queue_head_t wq;
|
||||
+ struct swait_head wq;
|
||||
struct pid *pid;
|
||||
int sigset_active;
|
||||
sigset_t sigset;
|
||||
@@ -690,7 +690,7 @@ static inline bool kvm_arch_has_noncoher
|
||||
}
|
||||
#endif
|
||||
|
||||
-static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
|
||||
+static inline struct swait_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
#ifdef __KVM_HAVE_ARCH_WQP
|
||||
return vcpu->arch.wqp;
|
||||
--- a/virt/kvm/async_pf.c
|
||||
+++ b/virt/kvm/async_pf.c
|
||||
@@ -94,8 +94,8 @@ static void async_pf_execute(struct work
|
||||
|
||||
trace_kvm_async_pf_completed(addr, gva);
|
||||
|
||||
- if (waitqueue_active(&vcpu->wq))
|
||||
- wake_up_interruptible(&vcpu->wq);
|
||||
+ if (swaitqueue_active(&vcpu->wq))
|
||||
+ swait_wake_interruptible(&vcpu->wq);
|
||||
|
||||
mmput(mm);
|
||||
kvm_put_kvm(vcpu->kvm);
|
||||
--- a/virt/kvm/kvm_main.c
|
||||
+++ b/virt/kvm/kvm_main.c
|
||||
@@ -218,7 +218,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu,
|
||||
vcpu->kvm = kvm;
|
||||
vcpu->vcpu_id = id;
|
||||
vcpu->pid = NULL;
|
||||
- init_waitqueue_head(&vcpu->wq);
|
||||
+ init_swait_head(&vcpu->wq);
|
||||
kvm_async_pf_vcpu_init(vcpu);
|
||||
|
||||
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
@@ -1779,7 +1779,7 @@ static int kvm_vcpu_check_block(struct k
|
||||
void kvm_vcpu_block(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
ktime_t start, cur;
|
||||
- DEFINE_WAIT(wait);
|
||||
+ DEFINE_SWAITER(wait);
|
||||
bool waited = false;
|
||||
|
||||
start = cur = ktime_get();
|
||||
@@ -1800,7 +1800,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcp
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
- prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
+ swait_prepare(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
|
||||
if (kvm_vcpu_check_block(vcpu) < 0)
|
||||
break;
|
||||
@@ -1809,7 +1809,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcp
|
||||
schedule();
|
||||
}
|
||||
|
||||
- finish_wait(&vcpu->wq, &wait);
|
||||
+ swait_finish(&vcpu->wq, &wait);
|
||||
cur = ktime_get();
|
||||
|
||||
out:
|
||||
@@ -1825,11 +1825,11 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu
|
||||
{
|
||||
int me;
|
||||
int cpu = vcpu->cpu;
|
||||
- wait_queue_head_t *wqp;
|
||||
+ struct swait_head *wqp;
|
||||
|
||||
wqp = kvm_arch_vcpu_wq(vcpu);
|
||||
- if (waitqueue_active(wqp)) {
|
||||
- wake_up_interruptible(wqp);
|
||||
+ if (swaitqueue_active(wqp)) {
|
||||
+ swait_wake_interruptible(wqp);
|
||||
++vcpu->stat.halt_wakeup;
|
||||
}
|
||||
|
||||
@@ -1930,7 +1930,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *m
|
||||
continue;
|
||||
if (vcpu == me)
|
||||
continue;
|
||||
- if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
|
||||
+ if (swaitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
|
||||
continue;
|
||||
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
|
||||
continue;
|
|
@ -1,174 +0,0 @@
|
|||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Wed, 13 Feb 2013 09:26:05 -0500
|
||||
Subject: acpi/rt: Convert acpi_gbl_hardware lock back to a raw_spinlock_t
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
We hit the following bug with 3.6-rt:
|
||||
|
||||
[ 5.898990] BUG: scheduling while atomic: swapper/3/0/0x00000002
|
||||
[ 5.898991] no locks held by swapper/3/0.
|
||||
[ 5.898993] Modules linked in:
|
||||
[ 5.898996] Pid: 0, comm: swapper/3 Not tainted 3.6.11-rt28.19.el6rt.x86_64.debug #1
|
||||
[ 5.898997] Call Trace:
|
||||
[ 5.899011] [<ffffffff810804e7>] __schedule_bug+0x67/0x90
|
||||
[ 5.899028] [<ffffffff81577923>] __schedule+0x793/0x7a0
|
||||
[ 5.899032] [<ffffffff810b4e40>] ? debug_rt_mutex_print_deadlock+0x50/0x200
|
||||
[ 5.899034] [<ffffffff81577b89>] schedule+0x29/0x70
|
||||
[ 5.899036] BUG: scheduling while atomic: swapper/7/0/0x00000002
|
||||
[ 5.899037] no locks held by swapper/7/0.
|
||||
[ 5.899039] [<ffffffff81578525>] rt_spin_lock_slowlock+0xe5/0x2f0
|
||||
[ 5.899040] Modules linked in:
|
||||
[ 5.899041]
|
||||
[ 5.899045] [<ffffffff81579a58>] ? _raw_spin_unlock_irqrestore+0x38/0x90
|
||||
[ 5.899046] Pid: 0, comm: swapper/7 Not tainted 3.6.11-rt28.19.el6rt.x86_64.debug #1
|
||||
[ 5.899047] Call Trace:
|
||||
[ 5.899049] [<ffffffff81578bc6>] rt_spin_lock+0x16/0x40
|
||||
[ 5.899052] [<ffffffff810804e7>] __schedule_bug+0x67/0x90
|
||||
[ 5.899054] [<ffffffff8157d3f0>] ? notifier_call_chain+0x80/0x80
|
||||
[ 5.899056] [<ffffffff81577923>] __schedule+0x793/0x7a0
|
||||
[ 5.899059] [<ffffffff812f2034>] acpi_os_acquire_lock+0x1f/0x23
|
||||
[ 5.899062] [<ffffffff810b4e40>] ? debug_rt_mutex_print_deadlock+0x50/0x200
|
||||
[ 5.899068] [<ffffffff8130be64>] acpi_write_bit_register+0x33/0xb0
|
||||
[ 5.899071] [<ffffffff81577b89>] schedule+0x29/0x70
|
||||
[ 5.899072] [<ffffffff8130be13>] ? acpi_read_bit_register+0x33/0x51
|
||||
[ 5.899074] [<ffffffff81578525>] rt_spin_lock_slowlock+0xe5/0x2f0
|
||||
[ 5.899077] [<ffffffff8131d1fc>] acpi_idle_enter_bm+0x8a/0x28e
|
||||
[ 5.899079] [<ffffffff81579a58>] ? _raw_spin_unlock_irqrestore+0x38/0x90
|
||||
[ 5.899081] [<ffffffff8107e5da>] ? this_cpu_load+0x1a/0x30
|
||||
[ 5.899083] [<ffffffff81578bc6>] rt_spin_lock+0x16/0x40
|
||||
[ 5.899087] [<ffffffff8144c759>] cpuidle_enter+0x19/0x20
|
||||
[ 5.899088] [<ffffffff8157d3f0>] ? notifier_call_chain+0x80/0x80
|
||||
[ 5.899090] [<ffffffff8144c777>] cpuidle_enter_state+0x17/0x50
|
||||
[ 5.899092] [<ffffffff812f2034>] acpi_os_acquire_lock+0x1f/0x23
|
||||
[ 5.899094] [<ffffffff8144d1a1>] cpuidle899101] [<ffffffff8130be13>] ?
|
||||
|
||||
As the acpi code disables interrupts in acpi_idle_enter_bm, and calls
|
||||
code that grabs the acpi lock, it causes issues as the lock is currently
|
||||
in RT a sleeping lock.
|
||||
|
||||
The lock was converted from a raw to a sleeping lock due to some
|
||||
previous issues, and tests that showed it didn't seem to matter.
|
||||
Unfortunately, it did matter for one of our boxes.
|
||||
|
||||
This patch converts the lock back to a raw lock. I've run this code on a
|
||||
few of my own machines, one being my laptop that uses the acpi quite
|
||||
extensively. I've been able to suspend and resume without issues.
|
||||
|
||||
[ tglx: Made the change exclusive for acpi_gbl_hardware_lock ]
|
||||
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Cc: John Kacur <jkacur@gmail.com>
|
||||
Cc: Clark Williams <clark@redhat.com>
|
||||
Link: http://lkml.kernel.org/r/1360765565.23152.5.camel@gandalf.local.home
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/acpi/acpica/acglobal.h | 2 +-
|
||||
drivers/acpi/acpica/hwregs.c | 4 ++--
|
||||
drivers/acpi/acpica/hwxface.c | 4 ++--
|
||||
drivers/acpi/acpica/utmutex.c | 4 ++--
|
||||
include/acpi/platform/aclinux.h | 15 +++++++++++++++
|
||||
5 files changed, 22 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/drivers/acpi/acpica/acglobal.h
|
||||
+++ b/drivers/acpi/acpica/acglobal.h
|
||||
@@ -112,7 +112,7 @@ ACPI_GLOBAL(u8, acpi_gbl_global_lock_pen
|
||||
* interrupt level
|
||||
*/
|
||||
ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */
|
||||
-ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
|
||||
+ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
|
||||
ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock);
|
||||
|
||||
/* Mutex for _OSI support */
|
||||
--- a/drivers/acpi/acpica/hwregs.c
|
||||
+++ b/drivers/acpi/acpica/hwregs.c
|
||||
@@ -269,14 +269,14 @@ acpi_status acpi_hw_clear_acpi_status(vo
|
||||
ACPI_BITMASK_ALL_FIXED_STATUS,
|
||||
ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address)));
|
||||
|
||||
- lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
|
||||
+ raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
|
||||
|
||||
/* Clear the fixed events in PM1 A/B */
|
||||
|
||||
status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
|
||||
ACPI_BITMASK_ALL_FIXED_STATUS);
|
||||
|
||||
- acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
|
||||
+ raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
|
||||
|
||||
if (ACPI_FAILURE(status)) {
|
||||
goto exit;
|
||||
--- a/drivers/acpi/acpica/hwxface.c
|
||||
+++ b/drivers/acpi/acpica/hwxface.c
|
||||
@@ -374,7 +374,7 @@ acpi_status acpi_write_bit_register(u32
|
||||
return_ACPI_STATUS(AE_BAD_PARAMETER);
|
||||
}
|
||||
|
||||
- lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
|
||||
+ raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
|
||||
|
||||
/*
|
||||
* At this point, we know that the parent register is one of the
|
||||
@@ -435,7 +435,7 @@ acpi_status acpi_write_bit_register(u32
|
||||
|
||||
unlock_and_exit:
|
||||
|
||||
- acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
|
||||
+ raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
|
||||
return_ACPI_STATUS(status);
|
||||
}
|
||||
|
||||
--- a/drivers/acpi/acpica/utmutex.c
|
||||
+++ b/drivers/acpi/acpica/utmutex.c
|
||||
@@ -88,7 +88,7 @@ acpi_status acpi_ut_mutex_initialize(voi
|
||||
return_ACPI_STATUS (status);
|
||||
}
|
||||
|
||||
- status = acpi_os_create_lock (&acpi_gbl_hardware_lock);
|
||||
+ status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock);
|
||||
if (ACPI_FAILURE (status)) {
|
||||
return_ACPI_STATUS (status);
|
||||
}
|
||||
@@ -141,7 +141,7 @@ void acpi_ut_mutex_terminate(void)
|
||||
/* Delete the spinlocks */
|
||||
|
||||
acpi_os_delete_lock(acpi_gbl_gpe_lock);
|
||||
- acpi_os_delete_lock(acpi_gbl_hardware_lock);
|
||||
+ acpi_os_delete_raw_lock(acpi_gbl_hardware_lock);
|
||||
acpi_os_delete_lock(acpi_gbl_reference_count_lock);
|
||||
|
||||
/* Delete the reader/writer lock */
|
||||
--- a/include/acpi/platform/aclinux.h
|
||||
+++ b/include/acpi/platform/aclinux.h
|
||||
@@ -123,6 +123,7 @@
|
||||
|
||||
#define acpi_cache_t struct kmem_cache
|
||||
#define acpi_spinlock spinlock_t *
|
||||
+#define acpi_raw_spinlock raw_spinlock_t *
|
||||
#define acpi_cpu_flags unsigned long
|
||||
|
||||
/* Use native linux version of acpi_os_allocate_zeroed */
|
||||
@@ -141,6 +142,20 @@
|
||||
#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id
|
||||
#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock
|
||||
|
||||
+#define acpi_os_create_raw_lock(__handle) \
|
||||
+({ \
|
||||
+ raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \
|
||||
+ \
|
||||
+ if (lock) { \
|
||||
+ *(__handle) = lock; \
|
||||
+ raw_spin_lock_init(*(__handle)); \
|
||||
+ } \
|
||||
+ lock ? AE_OK : AE_NO_MEMORY; \
|
||||
+ })
|
||||
+
|
||||
+#define acpi_os_delete_raw_lock(__handle) kfree(__handle)
|
||||
+
|
||||
+
|
||||
/*
|
||||
* OSL interfaces used by debugger/disassembler
|
||||
*/
|
|
@ -1,104 +0,0 @@
|
|||
From: Anders Roxell <anders.roxell@linaro.org>
|
||||
Date: Thu, 14 May 2015 17:52:17 +0200
|
||||
Subject: arch/arm64: Add lazy preempt support
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
arm64 is missing support for PREEMPT_RT. The main feature which is
|
||||
lacking is support for lazy preemption. The arch-specific entry code,
|
||||
thread information structure definitions, and associated data tables
|
||||
have to be extended to provide this support. Then the Kconfig file has
|
||||
to be extended to indicate the support is available, and also to
|
||||
indicate that support for full RT preemption is now available.
|
||||
|
||||
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
|
||||
---
|
||||
arch/arm64/Kconfig | 1 +
|
||||
arch/arm64/include/asm/thread_info.h | 3 +++
|
||||
arch/arm64/kernel/asm-offsets.c | 1 +
|
||||
arch/arm64/kernel/entry.S | 13 ++++++++++---
|
||||
4 files changed, 15 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/arch/arm64/Kconfig
|
||||
+++ b/arch/arm64/Kconfig
|
||||
@@ -69,6 +69,7 @@ config ARM64
|
||||
select HAVE_PERF_REGS
|
||||
select HAVE_PERF_USER_STACK_DUMP
|
||||
select HAVE_RCU_TABLE_FREE
|
||||
+ select HAVE_PREEMPT_LAZY
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
select IRQ_DOMAIN
|
||||
select IRQ_FORCED_THREADING
|
||||
--- a/arch/arm64/include/asm/thread_info.h
|
||||
+++ b/arch/arm64/include/asm/thread_info.h
|
||||
@@ -47,6 +47,7 @@ struct thread_info {
|
||||
mm_segment_t addr_limit; /* address limit */
|
||||
struct task_struct *task; /* main task structure */
|
||||
int preempt_count; /* 0 => preemptable, <0 => bug */
|
||||
+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
|
||||
int cpu; /* cpu */
|
||||
};
|
||||
|
||||
@@ -101,6 +102,7 @@ static inline struct thread_info *curren
|
||||
#define TIF_NEED_RESCHED 1
|
||||
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
|
||||
#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
|
||||
+#define TIF_NEED_RESCHED_LAZY 4
|
||||
#define TIF_NOHZ 7
|
||||
#define TIF_SYSCALL_TRACE 8
|
||||
#define TIF_SYSCALL_AUDIT 9
|
||||
@@ -117,6 +119,7 @@ static inline struct thread_info *curren
|
||||
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
|
||||
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
|
||||
#define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
|
||||
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
|
||||
#define _TIF_NOHZ (1 << TIF_NOHZ)
|
||||
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
|
||||
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
|
||||
--- a/arch/arm64/kernel/asm-offsets.c
|
||||
+++ b/arch/arm64/kernel/asm-offsets.c
|
||||
@@ -35,6 +35,7 @@ int main(void)
|
||||
BLANK();
|
||||
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
|
||||
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
|
||||
+ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
|
||||
DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
|
||||
DEFINE(TI_TASK, offsetof(struct thread_info, task));
|
||||
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
|
||||
--- a/arch/arm64/kernel/entry.S
|
||||
+++ b/arch/arm64/kernel/entry.S
|
||||
@@ -367,11 +367,16 @@ ENDPROC(el1_sync)
|
||||
#ifdef CONFIG_PREEMPT
|
||||
get_thread_info tsk
|
||||
ldr w24, [tsk, #TI_PREEMPT] // get preempt count
|
||||
- cbnz w24, 1f // preempt count != 0
|
||||
+ cbnz w24, 2f // preempt count != 0
|
||||
ldr x0, [tsk, #TI_FLAGS] // get flags
|
||||
- tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
|
||||
- bl el1_preempt
|
||||
+ tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
|
||||
+
|
||||
+ ldr w24, [tsk, #TI_PREEMPT_LAZY] // get preempt lazy count
|
||||
+ cbnz w24, 2f // preempt lazy count != 0
|
||||
+ tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling?
|
||||
1:
|
||||
+ bl el1_preempt
|
||||
+2:
|
||||
#endif
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
bl trace_hardirqs_on
|
||||
@@ -385,6 +390,7 @@ ENDPROC(el1_irq)
|
||||
1: bl preempt_schedule_irq // irq en/disable is done inside
|
||||
ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS
|
||||
tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
|
||||
+ tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling?
|
||||
ret x24
|
||||
#endif
|
||||
|
||||
@@ -622,6 +628,7 @@ ENDPROC(cpu_switch_to)
|
||||
str x0, [sp, #S_X0] // returned x0
|
||||
work_pending:
|
||||
tbnz x1, #TIF_NEED_RESCHED, work_resched
|
||||
+ tbnz x1, #TIF_NEED_RESCHED_LAZY, work_resched
|
||||
/* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
|
||||
ldr x2, [sp, #S_PSTATE]
|
||||
mov x0, sp // 'regs'
|
|
@ -1,57 +0,0 @@
|
|||
From: Benedikt Spranger <b.spranger@linutronix.de>
|
||||
Date: Sat, 6 Mar 2010 17:47:10 +0100
|
||||
Subject: ARM: AT91: PIT: Remove irq handler when clock event is unused
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Setup and remove the interrupt handler in clock event mode selection.
|
||||
This avoids calling the (shared) interrupt handler when the device is
|
||||
not used.
|
||||
|
||||
Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
[bigeasy: redo the patch with NR_IRQS_LEGACY which is probably required since
|
||||
commit 8fe82a55 ("ARM: at91: sparse irq support") which is included since v3.6.
|
||||
Patch based on what Sami Pietikäinen <Sami.Pietikainen@wapice.com> suggested].
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/clocksource/timer-atmel-pit.c | 4 ++++
|
||||
drivers/clocksource/timer-atmel-st.c | 1 +
|
||||
2 files changed, 5 insertions(+)
|
||||
|
||||
--- a/drivers/clocksource/timer-atmel-pit.c
|
||||
+++ b/drivers/clocksource/timer-atmel-pit.c
|
||||
@@ -90,6 +90,7 @@ static cycle_t read_pit_clk(struct clock
|
||||
return elapsed;
|
||||
}
|
||||
|
||||
+static struct irqaction at91sam926x_pit_irq;
|
||||
/*
|
||||
* Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16)
|
||||
*/
|
||||
@@ -100,6 +101,8 @@ pit_clkevt_mode(enum clock_event_mode mo
|
||||
|
||||
switch (mode) {
|
||||
case CLOCK_EVT_MODE_PERIODIC:
|
||||
+ /* Set up irq handler */
|
||||
+ setup_irq(at91sam926x_pit_irq.irq, &at91sam926x_pit_irq);
|
||||
/* update clocksource counter */
|
||||
data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR));
|
||||
pit_write(data->base, AT91_PIT_MR,
|
||||
@@ -113,6 +116,7 @@ pit_clkevt_mode(enum clock_event_mode mo
|
||||
/* disable irq, leaving the clocksource active */
|
||||
pit_write(data->base, AT91_PIT_MR,
|
||||
(data->cycle - 1) | AT91_PIT_PITEN);
|
||||
+ remove_irq(at91sam926x_pit_irq.irq, &at91sam926x_pit_irq);
|
||||
break;
|
||||
case CLOCK_EVT_MODE_RESUME:
|
||||
break;
|
||||
--- a/drivers/clocksource/timer-atmel-st.c
|
||||
+++ b/drivers/clocksource/timer-atmel-st.c
|
||||
@@ -131,6 +131,7 @@ clkevt32k_mode(enum clock_event_mode mod
|
||||
break;
|
||||
case CLOCK_EVT_MODE_SHUTDOWN:
|
||||
case CLOCK_EVT_MODE_UNUSED:
|
||||
+ remove_irq(NR_IRQS_LEGACY + AT91_ID_SYS, &at91rm9200_timer_irq);
|
||||
case CLOCK_EVT_MODE_RESUME:
|
||||
irqmask = 0;
|
||||
break;
|
|
@ -1,33 +0,0 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sat, 1 May 2010 18:29:35 +0200
|
||||
Subject: ARM: at91: tclib: Default to tclib timer for RT
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
RT is not too happy about the shared timer interrupt in AT91
|
||||
devices. Default to tclib timer for RT.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
drivers/misc/Kconfig | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/misc/Kconfig
|
||||
+++ b/drivers/misc/Kconfig
|
||||
@@ -54,6 +54,7 @@ config AD525X_DPOT_SPI
|
||||
config ATMEL_TCLIB
|
||||
bool "Atmel AT32/AT91 Timer/Counter Library"
|
||||
depends on (AVR32 || ARCH_AT91)
|
||||
+ default y if PREEMPT_RT_FULL
|
||||
help
|
||||
Select this if you want a library to allocate the Timer/Counter
|
||||
blocks found on many Atmel processors. This facilitates using
|
||||
@@ -86,7 +87,7 @@ config ATMEL_TCB_CLKSRC_BLOCK
|
||||
config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
|
||||
bool "TC Block use 32 KiHz clock"
|
||||
depends on ATMEL_TCB_CLKSRC
|
||||
- default y
|
||||
+ default y if !PREEMPT_RT_FULL
|
||||
help
|
||||
Select this to use 32 KiHz base clock rate as TC block clock
|
||||
source for clock events.
|
|
@ -1,466 +0,0 @@
|
|||
From: Frank Rowand <frank.rowand@am.sony.com>
|
||||
Date: Mon, 19 Sep 2011 14:51:14 -0700
|
||||
Subject: arm: Convert arm boot_lock to raw
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The arm boot_lock is used by the secondary processor startup code. The locking
|
||||
task is the idle thread, which has idle->sched_class == &idle_sched_class.
|
||||
idle_sched_class->enqueue_task == NULL, so if the idle task blocks on the
|
||||
lock, the attempt to wake it when the lock becomes available will fail:
|
||||
|
||||
try_to_wake_up()
|
||||
...
|
||||
activate_task()
|
||||
enqueue_task()
|
||||
p->sched_class->enqueue_task(rq, p, flags)
|
||||
|
||||
Fix by converting boot_lock to a raw spin lock.
|
||||
|
||||
Signed-off-by: Frank Rowand <frank.rowand@am.sony.com>
|
||||
Link: http://lkml.kernel.org/r/4E77B952.3010606@am.sony.com
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/arm/mach-exynos/platsmp.c | 12 ++++++------
|
||||
arch/arm/mach-hisi/platmcpm.c | 26 +++++++++++++-------------
|
||||
arch/arm/mach-omap2/omap-smp.c | 10 +++++-----
|
||||
arch/arm/mach-prima2/platsmp.c | 10 +++++-----
|
||||
arch/arm/mach-qcom/platsmp.c | 10 +++++-----
|
||||
arch/arm/mach-spear/platsmp.c | 10 +++++-----
|
||||
arch/arm/mach-sti/platsmp.c | 10 +++++-----
|
||||
arch/arm/mach-ux500/platsmp.c | 10 +++++-----
|
||||
arch/arm/plat-versatile/platsmp.c | 10 +++++-----
|
||||
9 files changed, 54 insertions(+), 54 deletions(-)
|
||||
|
||||
--- a/arch/arm/mach-exynos/platsmp.c
|
||||
+++ b/arch/arm/mach-exynos/platsmp.c
|
||||
@@ -231,7 +231,7 @@ static void __iomem *scu_base_addr(void)
|
||||
return (void __iomem *)(S5P_VA_SCU);
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void exynos_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -244,8 +244,8 @@ static void exynos_secondary_init(unsign
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -259,7 +259,7 @@ static int exynos_boot_secondary(unsigne
|
||||
* Set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -286,7 +286,7 @@ static int exynos_boot_secondary(unsigne
|
||||
|
||||
if (timeout == 0) {
|
||||
printk(KERN_ERR "cpu1 power enable failed");
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
}
|
||||
@@ -342,7 +342,7 @@ static int exynos_boot_secondary(unsigne
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
fail:
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? ret : 0;
|
||||
}
|
||||
--- a/arch/arm/mach-hisi/platmcpm.c
|
||||
+++ b/arch/arm/mach-hisi/platmcpm.c
|
||||
@@ -57,7 +57,7 @@
|
||||
|
||||
static void __iomem *sysctrl, *fabric;
|
||||
static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER];
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
static u32 fabric_phys_addr;
|
||||
/*
|
||||
* [0]: bootwrapper physical address
|
||||
@@ -104,7 +104,7 @@ static int hip04_mcpm_power_up(unsigned
|
||||
if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
|
||||
return -EINVAL;
|
||||
|
||||
- spin_lock_irq(&boot_lock);
|
||||
+ raw_spin_lock_irq(&boot_lock);
|
||||
|
||||
if (hip04_cpu_table[cluster][cpu])
|
||||
goto out;
|
||||
@@ -133,7 +133,7 @@ static int hip04_mcpm_power_up(unsigned
|
||||
udelay(20);
|
||||
out:
|
||||
hip04_cpu_table[cluster][cpu]++;
|
||||
- spin_unlock_irq(&boot_lock);
|
||||
+ raw_spin_unlock_irq(&boot_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -149,7 +149,7 @@ static void hip04_mcpm_power_down(void)
|
||||
|
||||
__mcpm_cpu_going_down(cpu, cluster);
|
||||
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
|
||||
hip04_cpu_table[cluster][cpu]--;
|
||||
if (hip04_cpu_table[cluster][cpu] == 1) {
|
||||
@@ -162,7 +162,7 @@ static void hip04_mcpm_power_down(void)
|
||||
|
||||
last_man = hip04_cluster_is_down(cluster);
|
||||
if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
/* Since it's Cortex A15, disable L2 prefetching. */
|
||||
asm volatile(
|
||||
"mcr p15, 1, %0, c15, c0, 3 \n\t"
|
||||
@@ -173,7 +173,7 @@ static void hip04_mcpm_power_down(void)
|
||||
hip04_set_snoop_filter(cluster, 0);
|
||||
__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
|
||||
} else {
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
v7_exit_coherency_flush(louis);
|
||||
}
|
||||
|
||||
@@ -192,7 +192,7 @@ static int hip04_mcpm_wait_for_powerdown
|
||||
cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
|
||||
|
||||
count = TIMEOUT_MSEC / POLL_MSEC;
|
||||
- spin_lock_irq(&boot_lock);
|
||||
+ raw_spin_lock_irq(&boot_lock);
|
||||
for (tries = 0; tries < count; tries++) {
|
||||
if (hip04_cpu_table[cluster][cpu]) {
|
||||
ret = -EBUSY;
|
||||
@@ -202,10 +202,10 @@ static int hip04_mcpm_wait_for_powerdown
|
||||
data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
|
||||
if (data & CORE_WFI_STATUS(cpu))
|
||||
break;
|
||||
- spin_unlock_irq(&boot_lock);
|
||||
+ raw_spin_unlock_irq(&boot_lock);
|
||||
/* Wait for clean L2 when the whole cluster is down. */
|
||||
msleep(POLL_MSEC);
|
||||
- spin_lock_irq(&boot_lock);
|
||||
+ raw_spin_lock_irq(&boot_lock);
|
||||
}
|
||||
if (tries >= count)
|
||||
goto err;
|
||||
@@ -220,10 +220,10 @@ static int hip04_mcpm_wait_for_powerdown
|
||||
}
|
||||
if (tries >= count)
|
||||
goto err;
|
||||
- spin_unlock_irq(&boot_lock);
|
||||
+ raw_spin_unlock_irq(&boot_lock);
|
||||
return 0;
|
||||
err:
|
||||
- spin_unlock_irq(&boot_lock);
|
||||
+ raw_spin_unlock_irq(&boot_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -235,10 +235,10 @@ static void hip04_mcpm_powered_up(void)
|
||||
cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
|
||||
cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
|
||||
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
if (!hip04_cpu_table[cluster][cpu])
|
||||
hip04_cpu_table[cluster][cpu] = 1;
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static void __naked hip04_mcpm_power_up_setup(unsigned int affinity_level)
|
||||
--- a/arch/arm/mach-omap2/omap-smp.c
|
||||
+++ b/arch/arm/mach-omap2/omap-smp.c
|
||||
@@ -43,7 +43,7 @@
|
||||
/* SCU base address */
|
||||
static void __iomem *scu_base;
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
void __iomem *omap4_get_scu_base(void)
|
||||
{
|
||||
@@ -74,8 +74,8 @@ static void omap4_secondary_init(unsigne
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -89,7 +89,7 @@ static int omap4_boot_secondary(unsigned
|
||||
* Set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* Update the AuxCoreBoot0 with boot state for secondary core.
|
||||
@@ -166,7 +166,7 @@ static int omap4_boot_secondary(unsigned
|
||||
* Now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
--- a/arch/arm/mach-prima2/platsmp.c
|
||||
+++ b/arch/arm/mach-prima2/platsmp.c
|
||||
@@ -22,7 +22,7 @@
|
||||
|
||||
static void __iomem *clk_base;
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void sirfsoc_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -36,8 +36,8 @@ static void sirfsoc_secondary_init(unsig
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static const struct of_device_id clk_ids[] = {
|
||||
@@ -75,7 +75,7 @@ static int sirfsoc_boot_secondary(unsign
|
||||
/* make sure write buffer is drained */
|
||||
mb();
|
||||
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -107,7 +107,7 @@ static int sirfsoc_boot_secondary(unsign
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
||||
--- a/arch/arm/mach-qcom/platsmp.c
|
||||
+++ b/arch/arm/mach-qcom/platsmp.c
|
||||
@@ -46,7 +46,7 @@
|
||||
|
||||
extern void secondary_startup_arm(void);
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static void __ref qcom_cpu_die(unsigned int cpu)
|
||||
@@ -60,8 +60,8 @@ static void qcom_secondary_init(unsigned
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int scss_release_secondary(unsigned int cpu)
|
||||
@@ -284,7 +284,7 @@ static int qcom_boot_secondary(unsigned
|
||||
* set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* Send the secondary CPU a soft interrupt, thereby causing
|
||||
@@ -297,7 +297,7 @@ static int qcom_boot_secondary(unsigned
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
--- a/arch/arm/mach-spear/platsmp.c
|
||||
+++ b/arch/arm/mach-spear/platsmp.c
|
||||
@@ -32,7 +32,7 @@ static void write_pen_release(int val)
|
||||
sync_cache_w(&pen_release);
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
|
||||
|
||||
@@ -47,8 +47,8 @@ static void spear13xx_secondary_init(uns
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -59,7 +59,7 @@ static int spear13xx_boot_secondary(unsi
|
||||
* set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -84,7 +84,7 @@ static int spear13xx_boot_secondary(unsi
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
||||
--- a/arch/arm/mach-sti/platsmp.c
|
||||
+++ b/arch/arm/mach-sti/platsmp.c
|
||||
@@ -34,7 +34,7 @@ static void write_pen_release(int val)
|
||||
sync_cache_w(&pen_release);
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void sti_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -49,8 +49,8 @@ static void sti_secondary_init(unsigned
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -61,7 +61,7 @@ static int sti_boot_secondary(unsigned i
|
||||
* set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -92,7 +92,7 @@ static int sti_boot_secondary(unsigned i
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
||||
--- a/arch/arm/mach-ux500/platsmp.c
|
||||
+++ b/arch/arm/mach-ux500/platsmp.c
|
||||
@@ -51,7 +51,7 @@ static void __iomem *scu_base_addr(void)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void ux500_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -64,8 +64,8 @@ static void ux500_secondary_init(unsigne
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int ux500_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -76,7 +76,7 @@ static int ux500_boot_secondary(unsigned
|
||||
* set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -97,7 +97,7 @@ static int ux500_boot_secondary(unsigned
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
||||
--- a/arch/arm/plat-versatile/platsmp.c
|
||||
+++ b/arch/arm/plat-versatile/platsmp.c
|
||||
@@ -30,7 +30,7 @@ static void write_pen_release(int val)
|
||||
sync_cache_w(&pen_release);
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
void versatile_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -43,8 +43,8 @@ void versatile_secondary_init(unsigned i
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -55,7 +55,7 @@ int versatile_boot_secondary(unsigned in
|
||||
* Set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* This is really belt and braces; we hold unintended secondary
|
||||
@@ -85,7 +85,7 @@ int versatile_boot_secondary(unsigned in
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
|
@ -1,149 +0,0 @@
|
|||
Subject: arm: Enable highmem for rt
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 13 Feb 2013 11:03:11 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
fixup highmem for ARM.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/arm/include/asm/switch_to.h | 8 ++++++
|
||||
arch/arm/mm/highmem.c | 46 ++++++++++++++++++++++++++++++++++-----
|
||||
include/linux/highmem.h | 1
|
||||
3 files changed, 50 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/arch/arm/include/asm/switch_to.h
|
||||
+++ b/arch/arm/include/asm/switch_to.h
|
||||
@@ -3,6 +3,13 @@
|
||||
|
||||
#include <linux/thread_info.h>
|
||||
|
||||
+#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM
|
||||
+void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p);
|
||||
+#else
|
||||
+static inline void
|
||||
+switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* For v7 SMP cores running a preemptible kernel we may be pre-empted
|
||||
* during a TLB maintenance operation, so execute an inner-shareable dsb
|
||||
@@ -22,6 +29,7 @@ extern struct task_struct *__switch_to(s
|
||||
|
||||
#define switch_to(prev,next,last) \
|
||||
do { \
|
||||
+ switch_kmaps(prev, next); \
|
||||
last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \
|
||||
} while (0)
|
||||
|
||||
--- a/arch/arm/mm/highmem.c
|
||||
+++ b/arch/arm/mm/highmem.c
|
||||
@@ -54,12 +54,13 @@ EXPORT_SYMBOL(kunmap);
|
||||
|
||||
void *kmap_atomic(struct page *page)
|
||||
{
|
||||
+ pte_t pte = mk_pte(page, kmap_prot);
|
||||
unsigned int idx;
|
||||
unsigned long vaddr;
|
||||
void *kmap;
|
||||
int type;
|
||||
|
||||
- preempt_disable();
|
||||
+ preempt_disable_nort();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -93,7 +94,10 @@ void *kmap_atomic(struct page *page)
|
||||
* in place, so the contained TLB flush ensures the TLB is updated
|
||||
* with the new mapping.
|
||||
*/
|
||||
- set_fixmap_pte(idx, mk_pte(page, kmap_prot));
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ current->kmap_pte[type] = pte;
|
||||
+#endif
|
||||
+ set_fixmap_pte(idx, pte);
|
||||
|
||||
return (void *)vaddr;
|
||||
}
|
||||
@@ -110,6 +114,9 @@ void __kunmap_atomic(void *kvaddr)
|
||||
|
||||
if (cache_is_vivt())
|
||||
__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ current->kmap_pte[type] = __pte(0);
|
||||
+#endif
|
||||
#ifdef CONFIG_DEBUG_HIGHMEM
|
||||
BUG_ON(vaddr != __fix_to_virt(idx));
|
||||
#else
|
||||
@@ -122,17 +129,18 @@ void __kunmap_atomic(void *kvaddr)
|
||||
kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
|
||||
}
|
||||
pagefault_enable();
|
||||
- preempt_enable();
|
||||
+ preempt_enable_nort();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
|
||||
void *kmap_atomic_pfn(unsigned long pfn)
|
||||
{
|
||||
+ pte_t pte = pfn_pte(pfn, kmap_prot);
|
||||
unsigned long vaddr;
|
||||
int idx, type;
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
- preempt_disable();
|
||||
+ preempt_disable_nort();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -143,7 +151,10 @@ void *kmap_atomic_pfn(unsigned long pfn)
|
||||
#ifdef CONFIG_DEBUG_HIGHMEM
|
||||
BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
|
||||
#endif
|
||||
- set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ current->kmap_pte[type] = pte;
|
||||
+#endif
|
||||
+ set_fixmap_pte(idx, pte);
|
||||
|
||||
return (void *)vaddr;
|
||||
}
|
||||
@@ -157,3 +168,28 @@ struct page *kmap_atomic_to_page(const v
|
||||
|
||||
return pte_page(get_fixmap_pte(vaddr));
|
||||
}
|
||||
+
|
||||
+#if defined CONFIG_PREEMPT_RT_FULL
|
||||
+void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ /*
|
||||
+ * Clear @prev's kmap_atomic mappings
|
||||
+ */
|
||||
+ for (i = 0; i < prev_p->kmap_idx; i++) {
|
||||
+ int idx = i + KM_TYPE_NR * smp_processor_id();
|
||||
+
|
||||
+ set_fixmap_pte(idx, __pte(0));
|
||||
+ }
|
||||
+ /*
|
||||
+ * Restore @next_p's kmap_atomic mappings
|
||||
+ */
|
||||
+ for (i = 0; i < next_p->kmap_idx; i++) {
|
||||
+ int idx = i + KM_TYPE_NR * smp_processor_id();
|
||||
+
|
||||
+ if (!pte_none(next_p->kmap_pte[i]))
|
||||
+ set_fixmap_pte(idx, next_p->kmap_pte[i]);
|
||||
+ }
|
||||
+}
|
||||
+#endif
|
||||
--- a/include/linux/highmem.h
|
||||
+++ b/include/linux/highmem.h
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/hardirq.h>
|
||||
+#include <linux/sched.h>
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 11 Mar 2013 21:37:27 +0100
|
||||
Subject: arm/highmem: Flush tlb on unmap
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The tlb should be flushed on unmap and thus make the mapping entry
|
||||
invalid. This is only done in the non-debug case which does not look
|
||||
right.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/mm/highmem.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/arm/mm/highmem.c
|
||||
+++ b/arch/arm/mm/highmem.c
|
||||
@@ -112,10 +112,10 @@ void __kunmap_atomic(void *kvaddr)
|
||||
__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
|
||||
#ifdef CONFIG_DEBUG_HIGHMEM
|
||||
BUG_ON(vaddr != __fix_to_virt(idx));
|
||||
- set_fixmap_pte(idx, __pte(0));
|
||||
#else
|
||||
(void) idx; /* to kill a warning */
|
||||
#endif
|
||||
+ set_fixmap_pte(idx, __pte(0));
|
||||
kmap_atomic_idx_pop();
|
||||
} else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
|
||||
/* this address was obtained through kmap_high_get() */
|
|
@ -1,106 +0,0 @@
|
|||
Subject: arm: Add support for lazy preemption
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 31 Oct 2012 12:04:11 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Implement the arm pieces for lazy preempt.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/arm/Kconfig | 1 +
|
||||
arch/arm/include/asm/thread_info.h | 3 +++
|
||||
arch/arm/kernel/asm-offsets.c | 1 +
|
||||
arch/arm/kernel/entry-armv.S | 13 +++++++++++--
|
||||
arch/arm/kernel/signal.c | 3 ++-
|
||||
5 files changed, 18 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/arch/arm/Kconfig
|
||||
+++ b/arch/arm/Kconfig
|
||||
@@ -66,6 +66,7 @@ config ARM
|
||||
select HAVE_PERF_EVENTS
|
||||
select HAVE_PERF_REGS
|
||||
select HAVE_PERF_USER_STACK_DUMP
|
||||
+ select HAVE_PREEMPT_LAZY
|
||||
select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE)
|
||||
select HAVE_REGS_AND_STACK_ACCESS_API
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
--- a/arch/arm/include/asm/thread_info.h
|
||||
+++ b/arch/arm/include/asm/thread_info.h
|
||||
@@ -50,6 +50,7 @@ struct cpu_context_save {
|
||||
struct thread_info {
|
||||
unsigned long flags; /* low level flags */
|
||||
int preempt_count; /* 0 => preemptable, <0 => bug */
|
||||
+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
|
||||
mm_segment_t addr_limit; /* address limit */
|
||||
struct task_struct *task; /* main task structure */
|
||||
__u32 cpu; /* cpu */
|
||||
@@ -147,6 +148,7 @@ extern int vfp_restore_user_hwstate(stru
|
||||
#define TIF_SIGPENDING 0
|
||||
#define TIF_NEED_RESCHED 1
|
||||
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
|
||||
+#define TIF_NEED_RESCHED_LAZY 3
|
||||
#define TIF_UPROBE 7
|
||||
#define TIF_SYSCALL_TRACE 8
|
||||
#define TIF_SYSCALL_AUDIT 9
|
||||
@@ -160,6 +162,7 @@ extern int vfp_restore_user_hwstate(stru
|
||||
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
|
||||
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
|
||||
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
|
||||
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
|
||||
#define _TIF_UPROBE (1 << TIF_UPROBE)
|
||||
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
|
||||
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
|
||||
--- a/arch/arm/kernel/asm-offsets.c
|
||||
+++ b/arch/arm/kernel/asm-offsets.c
|
||||
@@ -65,6 +65,7 @@ int main(void)
|
||||
BLANK();
|
||||
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
|
||||
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
|
||||
+ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
|
||||
DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
|
||||
DEFINE(TI_TASK, offsetof(struct thread_info, task));
|
||||
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
|
||||
--- a/arch/arm/kernel/entry-armv.S
|
||||
+++ b/arch/arm/kernel/entry-armv.S
|
||||
@@ -208,11 +208,18 @@ ENDPROC(__dabt_svc)
|
||||
#ifdef CONFIG_PREEMPT
|
||||
get_thread_info tsk
|
||||
ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
|
||||
- ldr r0, [tsk, #TI_FLAGS] @ get flags
|
||||
teq r8, #0 @ if preempt count != 0
|
||||
+ bne 1f @ return from exeption
|
||||
+ ldr r0, [tsk, #TI_FLAGS] @ get flags
|
||||
+ tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
|
||||
+ blne svc_preempt @ preempt!
|
||||
+
|
||||
+ ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
|
||||
+ teq r8, #0 @ if preempt lazy count != 0
|
||||
movne r0, #0 @ force flags to 0
|
||||
- tst r0, #_TIF_NEED_RESCHED
|
||||
+ tst r0, #_TIF_NEED_RESCHED_LAZY
|
||||
blne svc_preempt
|
||||
+1:
|
||||
#endif
|
||||
|
||||
svc_exit r5, irq = 1 @ return from exception
|
||||
@@ -227,6 +234,8 @@ ENDPROC(__irq_svc)
|
||||
1: bl preempt_schedule_irq @ irq en/disable is done inside
|
||||
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
|
||||
tst r0, #_TIF_NEED_RESCHED
|
||||
+ bne 1b
|
||||
+ tst r0, #_TIF_NEED_RESCHED_LAZY
|
||||
reteq r8 @ go again
|
||||
b 1b
|
||||
#endif
|
||||
--- a/arch/arm/kernel/signal.c
|
||||
+++ b/arch/arm/kernel/signal.c
|
||||
@@ -563,7 +563,8 @@ asmlinkage int
|
||||
do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
|
||||
{
|
||||
do {
|
||||
- if (likely(thread_flags & _TIF_NEED_RESCHED)) {
|
||||
+ if (likely(thread_flags & (_TIF_NEED_RESCHED |
|
||||
+ _TIF_NEED_RESCHED_LAZY))) {
|
||||
schedule();
|
||||
} else {
|
||||
if (unlikely(!user_mode(regs)))
|
|
@ -1,84 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 20 Sep 2013 14:31:54 +0200
|
||||
Subject: arm/unwind: use a raw_spin_lock
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Mostly unwind is done with irqs enabled however SLUB may call it with
|
||||
irqs disabled while creating a new SLUB cache.
|
||||
|
||||
I had system freeze while loading a module which called
|
||||
kmem_cache_create() on init. That means SLUB's __slab_alloc() disabled
|
||||
interrupts and then
|
||||
|
||||
->new_slab_objects()
|
||||
->new_slab()
|
||||
->setup_object()
|
||||
->setup_object_debug()
|
||||
->init_tracking()
|
||||
->set_track()
|
||||
->save_stack_trace()
|
||||
->save_stack_trace_tsk()
|
||||
->walk_stackframe()
|
||||
->unwind_frame()
|
||||
->unwind_find_idx()
|
||||
=>spin_lock_irqsave(&unwind_lock);
|
||||
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/kernel/unwind.c | 14 +++++++-------
|
||||
1 file changed, 7 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/arch/arm/kernel/unwind.c
|
||||
+++ b/arch/arm/kernel/unwind.c
|
||||
@@ -93,7 +93,7 @@ extern const struct unwind_idx __start_u
|
||||
static const struct unwind_idx *__origin_unwind_idx;
|
||||
extern const struct unwind_idx __stop_unwind_idx[];
|
||||
|
||||
-static DEFINE_SPINLOCK(unwind_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(unwind_lock);
|
||||
static LIST_HEAD(unwind_tables);
|
||||
|
||||
/* Convert a prel31 symbol to an absolute address */
|
||||
@@ -201,7 +201,7 @@ static const struct unwind_idx *unwind_f
|
||||
/* module unwind tables */
|
||||
struct unwind_table *table;
|
||||
|
||||
- spin_lock_irqsave(&unwind_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&unwind_lock, flags);
|
||||
list_for_each_entry(table, &unwind_tables, list) {
|
||||
if (addr >= table->begin_addr &&
|
||||
addr < table->end_addr) {
|
||||
@@ -213,7 +213,7 @@ static const struct unwind_idx *unwind_f
|
||||
break;
|
||||
}
|
||||
}
|
||||
- spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
}
|
||||
|
||||
pr_debug("%s: idx = %p\n", __func__, idx);
|
||||
@@ -529,9 +529,9 @@ struct unwind_table *unwind_table_add(un
|
||||
tab->begin_addr = text_addr;
|
||||
tab->end_addr = text_addr + text_size;
|
||||
|
||||
- spin_lock_irqsave(&unwind_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&unwind_lock, flags);
|
||||
list_add_tail(&tab->list, &unwind_tables);
|
||||
- spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
|
||||
return tab;
|
||||
}
|
||||
@@ -543,9 +543,9 @@ void unwind_table_del(struct unwind_tabl
|
||||
if (!tab)
|
||||
return;
|
||||
|
||||
- spin_lock_irqsave(&unwind_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&unwind_lock, flags);
|
||||
list_del(&tab->list);
|
||||
- spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
|
||||
kfree(tab);
|
||||
}
|
|
@ -1,65 +0,0 @@
|
|||
From: Steven Rostedt <srostedt@redhat.com>
|
||||
Date: Fri, 3 Jul 2009 08:44:29 -0500
|
||||
Subject: ata: Do not disable interrupts in ide code for preempt-rt
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Use the local_irq_*_nort variants.
|
||||
|
||||
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
drivers/ata/libata-sff.c | 12 ++++++------
|
||||
1 file changed, 6 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/drivers/ata/libata-sff.c
|
||||
+++ b/drivers/ata/libata-sff.c
|
||||
@@ -678,9 +678,9 @@ unsigned int ata_sff_data_xfer_noirq(str
|
||||
unsigned long flags;
|
||||
unsigned int consumed;
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ local_irq_save_nort(flags);
|
||||
consumed = ata_sff_data_xfer32(dev, buf, buflen, rw);
|
||||
- local_irq_restore(flags);
|
||||
+ local_irq_restore_nort(flags);
|
||||
|
||||
return consumed;
|
||||
}
|
||||
@@ -719,7 +719,7 @@ static void ata_pio_sector(struct ata_qu
|
||||
unsigned long flags;
|
||||
|
||||
/* FIXME: use a bounce buffer */
|
||||
- local_irq_save(flags);
|
||||
+ local_irq_save_nort(flags);
|
||||
buf = kmap_atomic(page);
|
||||
|
||||
/* do the actual data transfer */
|
||||
@@ -727,7 +727,7 @@ static void ata_pio_sector(struct ata_qu
|
||||
do_write);
|
||||
|
||||
kunmap_atomic(buf);
|
||||
- local_irq_restore(flags);
|
||||
+ local_irq_restore_nort(flags);
|
||||
} else {
|
||||
buf = page_address(page);
|
||||
ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size,
|
||||
@@ -864,7 +864,7 @@ static int __atapi_pio_bytes(struct ata_
|
||||
unsigned long flags;
|
||||
|
||||
/* FIXME: use bounce buffer */
|
||||
- local_irq_save(flags);
|
||||
+ local_irq_save_nort(flags);
|
||||
buf = kmap_atomic(page);
|
||||
|
||||
/* do the actual data transfer */
|
||||
@@ -872,7 +872,7 @@ static int __atapi_pio_bytes(struct ata_
|
||||
count, rw);
|
||||
|
||||
kunmap_atomic(buf);
|
||||
- local_irq_restore(flags);
|
||||
+ local_irq_restore_nort(flags);
|
||||
} else {
|
||||
buf = page_address(page);
|
||||
consumed = ap->ops->sff_data_xfer(dev, buf + offset,
|
|
@ -1,84 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Sat, 3 May 2014 11:00:29 +0200
|
||||
Subject: blk-mq: revert raw locks, post pone notifier to POST_DEAD
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The blk_mq_cpu_notify_lock should be raw because some CPU down levels
|
||||
are called with interrupts off. The notifier itself calls currently one
|
||||
function that is blk_mq_hctx_notify().
|
||||
That function acquires the ctx->lock lock which is sleeping and I would
|
||||
prefer to keep it that way. That function only moves IO-requests from
|
||||
the CPU that is going offline to another CPU and it is currently the
|
||||
only one. Therefore I revert the list lock back to sleeping spinlocks
|
||||
and let the notifier run at POST_DEAD time.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
block/blk-mq-cpu.c | 17 ++++++++++-------
|
||||
block/blk-mq.c | 2 +-
|
||||
2 files changed, 11 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/block/blk-mq-cpu.c
|
||||
+++ b/block/blk-mq-cpu.c
|
||||
@@ -16,7 +16,7 @@
|
||||
#include "blk-mq.h"
|
||||
|
||||
static LIST_HEAD(blk_mq_cpu_notify_list);
|
||||
-static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock);
|
||||
+static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock);
|
||||
|
||||
static int blk_mq_main_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
@@ -25,7 +25,10 @@ static int blk_mq_main_cpu_notify(struct
|
||||
struct blk_mq_cpu_notifier *notify;
|
||||
int ret = NOTIFY_OK;
|
||||
|
||||
- raw_spin_lock(&blk_mq_cpu_notify_lock);
|
||||
+ if (action != CPU_POST_DEAD)
|
||||
+ return NOTIFY_OK;
|
||||
+
|
||||
+ spin_lock(&blk_mq_cpu_notify_lock);
|
||||
|
||||
list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) {
|
||||
ret = notify->notify(notify->data, action, cpu);
|
||||
@@ -33,7 +36,7 @@ static int blk_mq_main_cpu_notify(struct
|
||||
break;
|
||||
}
|
||||
|
||||
- raw_spin_unlock(&blk_mq_cpu_notify_lock);
|
||||
+ spin_unlock(&blk_mq_cpu_notify_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -41,16 +44,16 @@ void blk_mq_register_cpu_notifier(struct
|
||||
{
|
||||
BUG_ON(!notifier->notify);
|
||||
|
||||
- raw_spin_lock(&blk_mq_cpu_notify_lock);
|
||||
+ spin_lock(&blk_mq_cpu_notify_lock);
|
||||
list_add_tail(¬ifier->list, &blk_mq_cpu_notify_list);
|
||||
- raw_spin_unlock(&blk_mq_cpu_notify_lock);
|
||||
+ spin_unlock(&blk_mq_cpu_notify_lock);
|
||||
}
|
||||
|
||||
void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
|
||||
{
|
||||
- raw_spin_lock(&blk_mq_cpu_notify_lock);
|
||||
+ spin_lock(&blk_mq_cpu_notify_lock);
|
||||
list_del(¬ifier->list);
|
||||
- raw_spin_unlock(&blk_mq_cpu_notify_lock);
|
||||
+ spin_unlock(&blk_mq_cpu_notify_lock);
|
||||
}
|
||||
|
||||
void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
|
||||
--- a/block/blk-mq.c
|
||||
+++ b/block/blk-mq.c
|
||||
@@ -1612,7 +1612,7 @@ static int blk_mq_hctx_notify(void *data
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = data;
|
||||
|
||||
- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
|
||||
+ if (action == CPU_POST_DEAD)
|
||||
return blk_mq_hctx_cpu_offline(hctx, cpu);
|
||||
|
||||
/*
|
|
@ -1,115 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 13 Feb 2015 11:01:26 +0100
|
||||
Subject: block: blk-mq: Use swait
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
| BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:914
|
||||
| in_atomic(): 1, irqs_disabled(): 0, pid: 255, name: kworker/u257:6
|
||||
| 5 locks held by kworker/u257:6/255:
|
||||
| #0: ("events_unbound"){.+.+.+}, at: [<ffffffff8108edf1>] process_one_work+0x171/0x5e0
|
||||
| #1: ((&entry->work)){+.+.+.}, at: [<ffffffff8108edf1>] process_one_work+0x171/0x5e0
|
||||
| #2: (&shost->scan_mutex){+.+.+.}, at: [<ffffffffa000faa3>] __scsi_add_device+0xa3/0x130 [scsi_mod]
|
||||
| #3: (&set->tag_list_lock){+.+...}, at: [<ffffffff812f09fa>] blk_mq_init_queue+0x96a/0xa50
|
||||
| #4: (rcu_read_lock_sched){......}, at: [<ffffffff8132887d>] percpu_ref_kill_and_confirm+0x1d/0x120
|
||||
| Preemption disabled at:[<ffffffff812eff76>] blk_mq_freeze_queue_start+0x56/0x70
|
||||
|
|
||||
| CPU: 2 PID: 255 Comm: kworker/u257:6 Not tainted 3.18.7-rt0+ #1
|
||||
| Workqueue: events_unbound async_run_entry_fn
|
||||
| 0000000000000003 ffff8800bc29f998 ffffffff815b3a12 0000000000000000
|
||||
| 0000000000000000 ffff8800bc29f9b8 ffffffff8109aa16 ffff8800bc29fa28
|
||||
| ffff8800bc5d1bc8 ffff8800bc29f9e8 ffffffff815b8dd4 ffff880000000000
|
||||
| Call Trace:
|
||||
| [<ffffffff815b3a12>] dump_stack+0x4f/0x7c
|
||||
| [<ffffffff8109aa16>] __might_sleep+0x116/0x190
|
||||
| [<ffffffff815b8dd4>] rt_spin_lock+0x24/0x60
|
||||
| [<ffffffff810b6089>] __wake_up+0x29/0x60
|
||||
| [<ffffffff812ee06e>] blk_mq_usage_counter_release+0x1e/0x20
|
||||
| [<ffffffff81328966>] percpu_ref_kill_and_confirm+0x106/0x120
|
||||
| [<ffffffff812eff76>] blk_mq_freeze_queue_start+0x56/0x70
|
||||
| [<ffffffff812f0000>] blk_mq_update_tag_set_depth+0x40/0xd0
|
||||
| [<ffffffff812f0a1c>] blk_mq_init_queue+0x98c/0xa50
|
||||
| [<ffffffffa000dcf0>] scsi_mq_alloc_queue+0x20/0x60 [scsi_mod]
|
||||
| [<ffffffffa000ea35>] scsi_alloc_sdev+0x2f5/0x370 [scsi_mod]
|
||||
| [<ffffffffa000f494>] scsi_probe_and_add_lun+0x9e4/0xdd0 [scsi_mod]
|
||||
| [<ffffffffa000fb26>] __scsi_add_device+0x126/0x130 [scsi_mod]
|
||||
| [<ffffffffa013033f>] ata_scsi_scan_host+0xaf/0x200 [libata]
|
||||
| [<ffffffffa012b5b6>] async_port_probe+0x46/0x60 [libata]
|
||||
| [<ffffffff810978fb>] async_run_entry_fn+0x3b/0xf0
|
||||
| [<ffffffff8108ee81>] process_one_work+0x201/0x5e0
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
block/blk-core.c | 2 +-
|
||||
block/blk-mq.c | 10 +++++-----
|
||||
include/linux/blkdev.h | 2 +-
|
||||
3 files changed, 7 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/block/blk-core.c
|
||||
+++ b/block/blk-core.c
|
||||
@@ -664,7 +664,7 @@ struct request_queue *blk_alloc_queue_no
|
||||
q->bypass_depth = 1;
|
||||
__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
|
||||
|
||||
- init_waitqueue_head(&q->mq_freeze_wq);
|
||||
+ init_swait_head(&q->mq_freeze_wq);
|
||||
|
||||
if (blkcg_init_queue(q))
|
||||
goto fail_bdi;
|
||||
--- a/block/blk-mq.c
|
||||
+++ b/block/blk-mq.c
|
||||
@@ -88,7 +88,7 @@ static int blk_mq_queue_enter(struct req
|
||||
if (!(gfp & __GFP_WAIT))
|
||||
return -EBUSY;
|
||||
|
||||
- ret = wait_event_interruptible(q->mq_freeze_wq,
|
||||
+ ret = swait_event_interruptible(q->mq_freeze_wq,
|
||||
!q->mq_freeze_depth || blk_queue_dying(q));
|
||||
if (blk_queue_dying(q))
|
||||
return -ENODEV;
|
||||
@@ -107,7 +107,7 @@ static void blk_mq_usage_counter_release
|
||||
struct request_queue *q =
|
||||
container_of(ref, struct request_queue, mq_usage_counter);
|
||||
|
||||
- wake_up_all(&q->mq_freeze_wq);
|
||||
+ swait_wake_all(&q->mq_freeze_wq);
|
||||
}
|
||||
|
||||
void blk_mq_freeze_queue_start(struct request_queue *q)
|
||||
@@ -127,7 +127,7 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_st
|
||||
|
||||
static void blk_mq_freeze_queue_wait(struct request_queue *q)
|
||||
{
|
||||
- wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
|
||||
+ swait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -151,7 +151,7 @@ void blk_mq_unfreeze_queue(struct reques
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
if (wake) {
|
||||
percpu_ref_reinit(&q->mq_usage_counter);
|
||||
- wake_up_all(&q->mq_freeze_wq);
|
||||
+ swait_wake_all(&q->mq_freeze_wq);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
|
||||
@@ -170,7 +170,7 @@ void blk_mq_wake_waiters(struct request_
|
||||
* dying, we need to ensure that processes currently waiting on
|
||||
* the queue are notified as well.
|
||||
*/
|
||||
- wake_up_all(&q->mq_freeze_wq);
|
||||
+ swait_wake_all(&q->mq_freeze_wq);
|
||||
}
|
||||
|
||||
bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
|
||||
--- a/include/linux/blkdev.h
|
||||
+++ b/include/linux/blkdev.h
|
||||
@@ -483,7 +483,7 @@ struct request_queue {
|
||||
struct throtl_data *td;
|
||||
#endif
|
||||
struct rcu_head rcu_head;
|
||||
- wait_queue_head_t mq_freeze_wq;
|
||||
+ struct swait_head mq_freeze_wq;
|
||||
struct percpu_ref mq_usage_counter;
|
||||
struct list_head all_q_node;
|
||||
|
|
@ -1,102 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 29 Jan 2015 15:10:08 +0100
|
||||
Subject: block/mq: don't complete requests via IPI
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The IPI runs in hardirq context and there are sleeping locks. This patch
|
||||
moves the completion into a workqueue.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
block/blk-core.c | 3 +++
|
||||
block/blk-mq.c | 20 ++++++++++++++++++++
|
||||
include/linux/blk-mq.h | 1 +
|
||||
include/linux/blkdev.h | 1 +
|
||||
4 files changed, 25 insertions(+)
|
||||
|
||||
--- a/block/blk-core.c
|
||||
+++ b/block/blk-core.c
|
||||
@@ -100,6 +100,9 @@ void blk_rq_init(struct request_queue *q
|
||||
|
||||
INIT_LIST_HEAD(&rq->queuelist);
|
||||
INIT_LIST_HEAD(&rq->timeout_list);
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
|
||||
+#endif
|
||||
rq->cpu = -1;
|
||||
rq->q = q;
|
||||
rq->__sector = (sector_t) -1;
|
||||
--- a/block/blk-mq.c
|
||||
+++ b/block/blk-mq.c
|
||||
@@ -217,6 +217,9 @@ static void blk_mq_rq_ctx_init(struct re
|
||||
rq->resid_len = 0;
|
||||
rq->sense = NULL;
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
|
||||
+#endif
|
||||
INIT_LIST_HEAD(&rq->timeout_list);
|
||||
rq->timeout = 0;
|
||||
|
||||
@@ -346,6 +349,17 @@ void blk_mq_end_request(struct request *
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_end_request);
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+
|
||||
+void __blk_mq_complete_request_remote_work(struct work_struct *work)
|
||||
+{
|
||||
+ struct request *rq = container_of(work, struct request, work);
|
||||
+
|
||||
+ rq->q->softirq_done_fn(rq);
|
||||
+}
|
||||
+
|
||||
+#else
|
||||
+
|
||||
static void __blk_mq_complete_request_remote(void *data)
|
||||
{
|
||||
struct request *rq = data;
|
||||
@@ -353,6 +367,8 @@ static void __blk_mq_complete_request_re
|
||||
rq->q->softirq_done_fn(rq);
|
||||
}
|
||||
|
||||
+#endif
|
||||
+
|
||||
static void blk_mq_ipi_complete_request(struct request *rq)
|
||||
{
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
@@ -369,10 +385,14 @@ static void blk_mq_ipi_complete_request(
|
||||
shared = cpus_share_cache(cpu, ctx->cpu);
|
||||
|
||||
if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ schedule_work_on(ctx->cpu, &rq->work);
|
||||
+#else
|
||||
rq->csd.func = __blk_mq_complete_request_remote;
|
||||
rq->csd.info = rq;
|
||||
rq->csd.flags = 0;
|
||||
smp_call_function_single_async(ctx->cpu, &rq->csd);
|
||||
+#endif
|
||||
} else {
|
||||
rq->q->softirq_done_fn(rq);
|
||||
}
|
||||
--- a/include/linux/blk-mq.h
|
||||
+++ b/include/linux/blk-mq.h
|
||||
@@ -202,6 +202,7 @@ static inline u16 blk_mq_unique_tag_to_t
|
||||
|
||||
struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
|
||||
struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
|
||||
+void __blk_mq_complete_request_remote_work(struct work_struct *work);
|
||||
|
||||
int blk_mq_request_started(struct request *rq);
|
||||
void blk_mq_start_request(struct request *rq);
|
||||
--- a/include/linux/blkdev.h
|
||||
+++ b/include/linux/blkdev.h
|
||||
@@ -101,6 +101,7 @@ struct request {
|
||||
struct list_head queuelist;
|
||||
union {
|
||||
struct call_single_data csd;
|
||||
+ struct work_struct work;
|
||||
unsigned long fifo_time;
|
||||
};
|
||||
|
|
@ -1,125 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 18 Feb 2015 18:37:26 +0100
|
||||
Subject: block/mq: drop per ctx cpu_lock
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
While converting the get_cpu() to get_cpu_light() I added a cpu lock to
|
||||
ensure the same code is not invoked twice on the same CPU. And now I run
|
||||
into this:
|
||||
|
||||
| kernel BUG at kernel/locking/rtmutex.c:996!
|
||||
| invalid opcode: 0000 [#1] PREEMPT SMP
|
||||
| CPU0: 13 PID: 75 Comm: kworker/u258:0 Tainted: G I 3.18.7-rt1.5+ #12
|
||||
| Workqueue: writeback bdi_writeback_workfn (flush-8:0)
|
||||
| task: ffff88023742a620 ti: ffff88023743c000 task.ti: ffff88023743c000
|
||||
| RIP: 0010:[<ffffffff81523cc0>] [<ffffffff81523cc0>] rt_spin_lock_slowlock+0x280/0x2d0
|
||||
| Call Trace:
|
||||
| [<ffffffff815254e7>] rt_spin_lock+0x27/0x60
|
||||
taking the same lock again
|
||||
|
|
||||
| [<ffffffff8127c771>] blk_mq_insert_requests+0x51/0x130
|
||||
| [<ffffffff8127d4a9>] blk_mq_flush_plug_list+0x129/0x140
|
||||
| [<ffffffff81272461>] blk_flush_plug_list+0xd1/0x250
|
||||
| [<ffffffff81522075>] schedule+0x75/0xa0
|
||||
| [<ffffffff8152474d>] do_nanosleep+0xdd/0x180
|
||||
| [<ffffffff810c8312>] __hrtimer_nanosleep+0xd2/0x1c0
|
||||
| [<ffffffff810c8456>] cpu_chill+0x56/0x80
|
||||
| [<ffffffff8107c13d>] try_to_grab_pending+0x1bd/0x390
|
||||
| [<ffffffff8107c431>] cancel_delayed_work+0x21/0x170
|
||||
| [<ffffffff81279a98>] blk_mq_stop_hw_queue+0x18/0x40
|
||||
| [<ffffffffa000ac6f>] scsi_queue_rq+0x7f/0x830 [scsi_mod]
|
||||
| [<ffffffff8127b0de>] __blk_mq_run_hw_queue+0x1ee/0x360
|
||||
| [<ffffffff8127b528>] blk_mq_map_request+0x108/0x190
|
||||
take the lock ^^^
|
||||
|
|
||||
| [<ffffffff8127c8d2>] blk_sq_make_request+0x82/0x350
|
||||
| [<ffffffff8126f6c0>] generic_make_request+0xd0/0x120
|
||||
| [<ffffffff8126f788>] submit_bio+0x78/0x190
|
||||
| [<ffffffff811bd537>] _submit_bh+0x117/0x180
|
||||
| [<ffffffff811bf528>] __block_write_full_page.constprop.38+0x138/0x3f0
|
||||
| [<ffffffff811bf880>] block_write_full_page+0xa0/0xe0
|
||||
| [<ffffffff811c02b3>] blkdev_writepage+0x13/0x20
|
||||
| [<ffffffff81127b25>] __writepage+0x15/0x40
|
||||
| [<ffffffff8112873b>] write_cache_pages+0x1fb/0x440
|
||||
| [<ffffffff811289be>] generic_writepages+0x3e/0x60
|
||||
| [<ffffffff8112a17c>] do_writepages+0x1c/0x30
|
||||
| [<ffffffff811b3603>] __writeback_single_inode+0x33/0x140
|
||||
| [<ffffffff811b462d>] writeback_sb_inodes+0x2bd/0x490
|
||||
| [<ffffffff811b4897>] __writeback_inodes_wb+0x97/0xd0
|
||||
| [<ffffffff811b4a9b>] wb_writeback+0x1cb/0x210
|
||||
| [<ffffffff811b505b>] bdi_writeback_workfn+0x25b/0x380
|
||||
| [<ffffffff8107b50b>] process_one_work+0x1bb/0x490
|
||||
| [<ffffffff8107c7ab>] worker_thread+0x6b/0x4f0
|
||||
| [<ffffffff81081863>] kthread+0xe3/0x100
|
||||
| [<ffffffff8152627c>] ret_from_fork+0x7c/0xb0
|
||||
|
||||
After looking at this for a while it seems that it is save if blk_mq_ctx is
|
||||
used multiple times, the in struct lock protects the access.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
block/blk-mq.c | 4 ----
|
||||
block/blk-mq.h | 8 --------
|
||||
2 files changed, 12 deletions(-)
|
||||
|
||||
--- a/block/blk-mq.c
|
||||
+++ b/block/blk-mq.c
|
||||
@@ -1386,9 +1386,7 @@ static void blk_sq_make_request(struct r
|
||||
if (list_empty(&plug->mq_list))
|
||||
trace_block_plug(q);
|
||||
else if (request_count >= BLK_MAX_REQUEST_COUNT) {
|
||||
- spin_unlock(&data.ctx->cpu_lock);
|
||||
blk_flush_plug_list(plug, false);
|
||||
- spin_lock(&data.ctx->cpu_lock);
|
||||
trace_block_plug(q);
|
||||
}
|
||||
list_add_tail(&rq->queuelist, &plug->mq_list);
|
||||
@@ -1581,7 +1579,6 @@ static int blk_mq_hctx_cpu_offline(struc
|
||||
blk_mq_hctx_clear_pending(hctx, ctx);
|
||||
}
|
||||
spin_unlock(&ctx->lock);
|
||||
- __blk_mq_put_ctx(ctx);
|
||||
|
||||
if (list_empty(&tmp))
|
||||
return NOTIFY_OK;
|
||||
@@ -1775,7 +1772,6 @@ static void blk_mq_init_cpu_queues(struc
|
||||
memset(__ctx, 0, sizeof(*__ctx));
|
||||
__ctx->cpu = i;
|
||||
spin_lock_init(&__ctx->lock);
|
||||
- spin_lock_init(&__ctx->cpu_lock);
|
||||
INIT_LIST_HEAD(&__ctx->rq_list);
|
||||
__ctx->queue = q;
|
||||
|
||||
--- a/block/blk-mq.h
|
||||
+++ b/block/blk-mq.h
|
||||
@@ -9,7 +9,6 @@ struct blk_mq_ctx {
|
||||
struct list_head rq_list;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
- spinlock_t cpu_lock;
|
||||
unsigned int cpu;
|
||||
unsigned int index_hw;
|
||||
|
||||
@@ -80,7 +79,6 @@ static inline struct blk_mq_ctx *__blk_m
|
||||
struct blk_mq_ctx *ctx;
|
||||
|
||||
ctx = per_cpu_ptr(q->queue_ctx, cpu);
|
||||
- spin_lock(&ctx->cpu_lock);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
@@ -95,14 +93,8 @@ static inline struct blk_mq_ctx *blk_mq_
|
||||
return __blk_mq_get_ctx(q, get_cpu_light());
|
||||
}
|
||||
|
||||
-static void __blk_mq_put_ctx(struct blk_mq_ctx *ctx)
|
||||
-{
|
||||
- spin_unlock(&ctx->cpu_lock);
|
||||
-}
|
||||
-
|
||||
static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
|
||||
{
|
||||
- __blk_mq_put_ctx(ctx);
|
||||
put_cpu_light();
|
||||
}
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 14 Jul 2015 14:26:34 +0200
|
||||
Subject: block/mq: do not invoke preempt_disable()
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
preempt_disable() and get_cpu() don't play well together with the sleeping
|
||||
locks it tries to allocate later.
|
||||
It seems to be enough to replace it with get_cpu_light() and migrate_disable().
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
block/blk-mq.c | 10 +++++-----
|
||||
1 file changed, 5 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/block/blk-mq.c
|
||||
+++ b/block/blk-mq.c
|
||||
@@ -364,7 +364,7 @@ static void blk_mq_ipi_complete_request(
|
||||
return;
|
||||
}
|
||||
|
||||
- cpu = get_cpu();
|
||||
+ cpu = get_cpu_light();
|
||||
if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
|
||||
shared = cpus_share_cache(cpu, ctx->cpu);
|
||||
|
||||
@@ -376,7 +376,7 @@ static void blk_mq_ipi_complete_request(
|
||||
} else {
|
||||
rq->q->softirq_done_fn(rq);
|
||||
}
|
||||
- put_cpu();
|
||||
+ put_cpu_light();
|
||||
}
|
||||
|
||||
void __blk_mq_complete_request(struct request *rq)
|
||||
@@ -905,14 +905,14 @@ void blk_mq_run_hw_queue(struct blk_mq_h
|
||||
return;
|
||||
|
||||
if (!async) {
|
||||
- int cpu = get_cpu();
|
||||
+ int cpu = get_cpu_light();
|
||||
if (cpumask_test_cpu(cpu, hctx->cpumask)) {
|
||||
__blk_mq_run_hw_queue(hctx);
|
||||
- put_cpu();
|
||||
+ put_cpu_light();
|
||||
return;
|
||||
}
|
||||
|
||||
- put_cpu();
|
||||
+ put_cpu_light();
|
||||
}
|
||||
|
||||
kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
|
|
@ -1,90 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 9 Apr 2014 10:37:23 +0200
|
||||
Subject: block: mq: use cpu_light()
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
there is a might sleep splat because get_cpu() disables preemption and
|
||||
later we grab a lock. As a workaround for this we use get_cpu_light()
|
||||
and an additional lock to prevent taking the same ctx.
|
||||
|
||||
There is a lock member in the ctx already but there some functions which do ++
|
||||
on the member and this works with irq off but on RT we would need the extra lock.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
block/blk-mq.c | 4 ++++
|
||||
block/blk-mq.h | 17 ++++++++++++++---
|
||||
2 files changed, 18 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/block/blk-mq.c
|
||||
+++ b/block/blk-mq.c
|
||||
@@ -1366,7 +1366,9 @@ static void blk_sq_make_request(struct r
|
||||
if (list_empty(&plug->mq_list))
|
||||
trace_block_plug(q);
|
||||
else if (request_count >= BLK_MAX_REQUEST_COUNT) {
|
||||
+ spin_unlock(&data.ctx->cpu_lock);
|
||||
blk_flush_plug_list(plug, false);
|
||||
+ spin_lock(&data.ctx->cpu_lock);
|
||||
trace_block_plug(q);
|
||||
}
|
||||
list_add_tail(&rq->queuelist, &plug->mq_list);
|
||||
@@ -1559,6 +1561,7 @@ static int blk_mq_hctx_cpu_offline(struc
|
||||
blk_mq_hctx_clear_pending(hctx, ctx);
|
||||
}
|
||||
spin_unlock(&ctx->lock);
|
||||
+ __blk_mq_put_ctx(ctx);
|
||||
|
||||
if (list_empty(&tmp))
|
||||
return NOTIFY_OK;
|
||||
@@ -1752,6 +1755,7 @@ static void blk_mq_init_cpu_queues(struc
|
||||
memset(__ctx, 0, sizeof(*__ctx));
|
||||
__ctx->cpu = i;
|
||||
spin_lock_init(&__ctx->lock);
|
||||
+ spin_lock_init(&__ctx->cpu_lock);
|
||||
INIT_LIST_HEAD(&__ctx->rq_list);
|
||||
__ctx->queue = q;
|
||||
|
||||
--- a/block/blk-mq.h
|
||||
+++ b/block/blk-mq.h
|
||||
@@ -9,6 +9,7 @@ struct blk_mq_ctx {
|
||||
struct list_head rq_list;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
+ spinlock_t cpu_lock;
|
||||
unsigned int cpu;
|
||||
unsigned int index_hw;
|
||||
|
||||
@@ -76,7 +77,11 @@ struct blk_align_bitmap {
|
||||
static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
|
||||
unsigned int cpu)
|
||||
{
|
||||
- return per_cpu_ptr(q->queue_ctx, cpu);
|
||||
+ struct blk_mq_ctx *ctx;
|
||||
+
|
||||
+ ctx = per_cpu_ptr(q->queue_ctx, cpu);
|
||||
+ spin_lock(&ctx->cpu_lock);
|
||||
+ return ctx;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -87,12 +92,18 @@ static inline struct blk_mq_ctx *__blk_m
|
||||
*/
|
||||
static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
|
||||
{
|
||||
- return __blk_mq_get_ctx(q, get_cpu());
|
||||
+ return __blk_mq_get_ctx(q, get_cpu_light());
|
||||
+}
|
||||
+
|
||||
+static void __blk_mq_put_ctx(struct blk_mq_ctx *ctx)
|
||||
+{
|
||||
+ spin_unlock(&ctx->cpu_lock);
|
||||
}
|
||||
|
||||
static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
|
||||
{
|
||||
- put_cpu();
|
||||
+ __blk_mq_put_ctx(ctx);
|
||||
+ put_cpu_light();
|
||||
}
|
||||
|
||||
struct blk_mq_alloc_data {
|
|
@ -1,97 +0,0 @@
|
|||
Subject: block: Shorten interrupt disabled regions
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 22 Jun 2011 19:47:02 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Moving the blk_sched_flush_plug() call out of the interrupt/preempt
|
||||
disabled region in the scheduler allows us to replace
|
||||
local_irq_save/restore(flags) by local_irq_disable/enable() in
|
||||
blk_flush_plug().
|
||||
|
||||
Now instead of doing this we disable interrupts explicitely when we
|
||||
lock the request_queue and reenable them when we drop the lock. That
|
||||
allows interrupts to be handled when the plug list contains requests
|
||||
for more than one queue.
|
||||
|
||||
Aside of that this change makes the scope of the irq disabled region
|
||||
more obvious. The current code confused the hell out of me when
|
||||
looking at:
|
||||
|
||||
local_irq_save(flags);
|
||||
spin_lock(q->queue_lock);
|
||||
...
|
||||
queue_unplugged(q...);
|
||||
scsi_request_fn();
|
||||
spin_unlock(q->queue_lock);
|
||||
spin_lock(shost->host_lock);
|
||||
spin_unlock_irq(shost->host_lock);
|
||||
|
||||
-------------------^^^ ????
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
spin_unlock(q->lock);
|
||||
local_irq_restore(flags);
|
||||
|
||||
Also add a comment to __blk_run_queue() documenting that
|
||||
q->request_fn() can drop q->queue_lock and reenable interrupts, but
|
||||
must return with q->queue_lock held and interrupts disabled.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Tejun Heo <tj@kernel.org>
|
||||
Cc: Jens Axboe <axboe@kernel.dk>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Link: http://lkml.kernel.org/r/20110622174919.025446432@linutronix.de
|
||||
---
|
||||
block/blk-core.c | 12 ++----------
|
||||
1 file changed, 2 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/block/blk-core.c
|
||||
+++ b/block/blk-core.c
|
||||
@@ -3077,7 +3077,7 @@ static void queue_unplugged(struct reque
|
||||
blk_run_queue_async(q);
|
||||
else
|
||||
__blk_run_queue(q);
|
||||
- spin_unlock(q->queue_lock);
|
||||
+ spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
|
||||
static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
|
||||
@@ -3125,7 +3125,6 @@ EXPORT_SYMBOL(blk_check_plugged);
|
||||
void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
||||
{
|
||||
struct request_queue *q;
|
||||
- unsigned long flags;
|
||||
struct request *rq;
|
||||
LIST_HEAD(list);
|
||||
unsigned int depth;
|
||||
@@ -3145,11 +3144,6 @@ void blk_flush_plug_list(struct blk_plug
|
||||
q = NULL;
|
||||
depth = 0;
|
||||
|
||||
- /*
|
||||
- * Save and disable interrupts here, to avoid doing it for every
|
||||
- * queue lock we have to take.
|
||||
- */
|
||||
- local_irq_save(flags);
|
||||
while (!list_empty(&list)) {
|
||||
rq = list_entry_rq(list.next);
|
||||
list_del_init(&rq->queuelist);
|
||||
@@ -3162,7 +3156,7 @@ void blk_flush_plug_list(struct blk_plug
|
||||
queue_unplugged(q, depth, from_schedule);
|
||||
q = rq->q;
|
||||
depth = 0;
|
||||
- spin_lock(q->queue_lock);
|
||||
+ spin_lock_irq(q->queue_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3189,8 +3183,6 @@ void blk_flush_plug_list(struct blk_plug
|
||||
*/
|
||||
if (q)
|
||||
queue_unplugged(q, depth, from_schedule);
|
||||
-
|
||||
- local_irq_restore(flags);
|
||||
}
|
||||
|
||||
void blk_finish_plug(struct blk_plug *plug)
|
|
@ -1,46 +0,0 @@
|
|||
Subject: block: Use cpu_chill() for retry loops
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 20 Dec 2012 18:28:26 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Retry loops on RT might loop forever when the modifying side was
|
||||
preempted. Steven also observed a live lock when there was a
|
||||
concurrent priority boosting going on.
|
||||
|
||||
Use cpu_chill() instead of cpu_relax() to let the system
|
||||
make progress.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
block/blk-ioc.c | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/block/blk-ioc.c
|
||||
+++ b/block/blk-ioc.c
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/slab.h>
|
||||
+#include <linux/delay.h>
|
||||
|
||||
#include "blk.h"
|
||||
|
||||
@@ -109,7 +110,7 @@ static void ioc_release_fn(struct work_s
|
||||
spin_unlock(q->queue_lock);
|
||||
} else {
|
||||
spin_unlock_irqrestore(&ioc->lock, flags);
|
||||
- cpu_relax();
|
||||
+ cpu_chill();
|
||||
spin_lock_irqsave_nested(&ioc->lock, flags, 1);
|
||||
}
|
||||
}
|
||||
@@ -187,7 +188,7 @@ void put_io_context_active(struct io_con
|
||||
spin_unlock(icq->q->queue_lock);
|
||||
} else {
|
||||
spin_unlock_irqrestore(&ioc->lock, flags);
|
||||
- cpu_relax();
|
||||
+ cpu_chill();
|
||||
goto retry;
|
||||
}
|
||||
}
|
|
@ -1,37 +0,0 @@
|
|||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:58 -0500
|
||||
Subject: bug: BUG_ON/WARN_ON variants dependend on RT/!RT
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Introduce RT/NON-RT WARN/BUG statements to avoid ifdefs in the code.
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
include/asm-generic/bug.h | 14 ++++++++++++++
|
||||
1 file changed, 14 insertions(+)
|
||||
|
||||
--- a/include/asm-generic/bug.h
|
||||
+++ b/include/asm-generic/bug.h
|
||||
@@ -206,6 +206,20 @@ extern void warn_slowpath_null(const cha
|
||||
# define WARN_ON_SMP(x) ({0;})
|
||||
#endif
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+# define BUG_ON_RT(c) BUG_ON(c)
|
||||
+# define BUG_ON_NONRT(c) do { } while (0)
|
||||
+# define WARN_ON_RT(condition) WARN_ON(condition)
|
||||
+# define WARN_ON_NONRT(condition) do { } while (0)
|
||||
+# define WARN_ON_ONCE_NONRT(condition) do { } while (0)
|
||||
+#else
|
||||
+# define BUG_ON_RT(c) do { } while (0)
|
||||
+# define BUG_ON_NONRT(c) BUG_ON(c)
|
||||
+# define WARN_ON_RT(condition) do { } while (0)
|
||||
+# define WARN_ON_NONRT(condition) WARN_ON(condition)
|
||||
+# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition)
|
||||
+#endif
|
||||
+
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif
|
|
@ -1,65 +0,0 @@
|
|||
From: Mike Galbraith <umgwanakikbuti@gmail.com>
|
||||
Date: Sat, 21 Jun 2014 10:09:48 +0200
|
||||
Subject: memcontrol: Prevent scheduling while atomic in cgroup code
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
mm, memcg: make refill_stock() use get_cpu_light()
|
||||
|
||||
Nikita reported the following memcg scheduling while atomic bug:
|
||||
|
||||
Call Trace:
|
||||
[e22d5a90] [c0007ea8] show_stack+0x4c/0x168 (unreliable)
|
||||
[e22d5ad0] [c0618c04] __schedule_bug+0x94/0xb0
|
||||
[e22d5ae0] [c060b9ec] __schedule+0x530/0x550
|
||||
[e22d5bf0] [c060bacc] schedule+0x30/0xbc
|
||||
[e22d5c00] [c060ca24] rt_spin_lock_slowlock+0x180/0x27c
|
||||
[e22d5c70] [c00b39dc] res_counter_uncharge_until+0x40/0xc4
|
||||
[e22d5ca0] [c013ca88] drain_stock.isra.20+0x54/0x98
|
||||
[e22d5cc0] [c01402ac] __mem_cgroup_try_charge+0x2e8/0xbac
|
||||
[e22d5d70] [c01410d4] mem_cgroup_charge_common+0x3c/0x70
|
||||
[e22d5d90] [c0117284] __do_fault+0x38c/0x510
|
||||
[e22d5df0] [c011a5f4] handle_pte_fault+0x98/0x858
|
||||
[e22d5e50] [c060ed08] do_page_fault+0x42c/0x6fc
|
||||
[e22d5f40] [c000f5b4] handle_page_fault+0xc/0x80
|
||||
|
||||
What happens:
|
||||
|
||||
refill_stock()
|
||||
get_cpu_var()
|
||||
drain_stock()
|
||||
res_counter_uncharge()
|
||||
res_counter_uncharge_until()
|
||||
spin_lock() <== boom
|
||||
|
||||
Fix it by replacing get/put_cpu_var() with get/put_cpu_light().
|
||||
|
||||
|
||||
Reported-by: Nikita Yushchenko <nyushchenko@dev.rtsoft.ru>
|
||||
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/memcontrol.c | 7 +++++--
|
||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/mm/memcontrol.c
|
||||
+++ b/mm/memcontrol.c
|
||||
@@ -2127,14 +2127,17 @@ static void drain_local_stock(struct wor
|
||||
*/
|
||||
static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
|
||||
{
|
||||
- struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
|
||||
+ struct memcg_stock_pcp *stock;
|
||||
+ int cpu = get_cpu_light();
|
||||
+
|
||||
+ stock = &per_cpu(memcg_stock, cpu);
|
||||
|
||||
if (stock->cached != memcg) { /* reset if necessary */
|
||||
drain_stock(stock);
|
||||
stock->cached = memcg;
|
||||
}
|
||||
stock->nr_pages += nr_pages;
|
||||
- put_cpu_var(memcg_stock);
|
||||
+ put_cpu_light();
|
||||
}
|
||||
|
||||
/*
|
|
@ -1,87 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 13 Feb 2015 15:52:24 +0100
|
||||
Subject: cgroups: use simple wait in css_release()
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
To avoid:
|
||||
|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:914
|
||||
|in_atomic(): 1, irqs_disabled(): 0, pid: 92, name: rcuc/11
|
||||
|2 locks held by rcuc/11/92:
|
||||
| #0: (rcu_callback){......}, at: [<ffffffff810e037e>] rcu_cpu_kthread+0x3de/0x940
|
||||
| #1: (rcu_read_lock_sched){......}, at: [<ffffffff81328390>] percpu_ref_call_confirm_rcu+0x0/0xd0
|
||||
|Preemption disabled at:[<ffffffff813284e2>] percpu_ref_switch_to_atomic_rcu+0x82/0xc0
|
||||
|CPU: 11 PID: 92 Comm: rcuc/11 Not tainted 3.18.7-rt0+ #1
|
||||
| ffff8802398cdf80 ffff880235f0bc28 ffffffff815b3a12 0000000000000000
|
||||
| 0000000000000000 ffff880235f0bc48 ffffffff8109aa16 0000000000000000
|
||||
| ffff8802398cdf80 ffff880235f0bc78 ffffffff815b8dd4 000000000000df80
|
||||
|Call Trace:
|
||||
| [<ffffffff815b3a12>] dump_stack+0x4f/0x7c
|
||||
| [<ffffffff8109aa16>] __might_sleep+0x116/0x190
|
||||
| [<ffffffff815b8dd4>] rt_spin_lock+0x24/0x60
|
||||
| [<ffffffff8108d2cd>] queue_work_on+0x6d/0x1d0
|
||||
| [<ffffffff8110c881>] css_release+0x81/0x90
|
||||
| [<ffffffff8132844e>] percpu_ref_call_confirm_rcu+0xbe/0xd0
|
||||
| [<ffffffff813284e2>] percpu_ref_switch_to_atomic_rcu+0x82/0xc0
|
||||
| [<ffffffff810e03e5>] rcu_cpu_kthread+0x445/0x940
|
||||
| [<ffffffff81098a2d>] smpboot_thread_fn+0x18d/0x2d0
|
||||
| [<ffffffff810948d8>] kthread+0xe8/0x100
|
||||
| [<ffffffff815b9c3c>] ret_from_fork+0x7c/0xb0
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/cgroup.h | 2 ++
|
||||
kernel/cgroup.c | 9 +++++----
|
||||
2 files changed, 7 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/include/linux/cgroup.h
|
||||
+++ b/include/linux/cgroup.h
|
||||
@@ -22,6 +22,7 @@
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/kernfs.h>
|
||||
#include <linux/wait.h>
|
||||
+#include <linux/work-simple.h>
|
||||
|
||||
#ifdef CONFIG_CGROUPS
|
||||
|
||||
@@ -91,6 +92,7 @@ struct cgroup_subsys_state {
|
||||
/* percpu_ref killing and RCU release */
|
||||
struct rcu_head rcu_head;
|
||||
struct work_struct destroy_work;
|
||||
+ struct swork_event destroy_swork;
|
||||
};
|
||||
|
||||
/* bits in struct cgroup_subsys_state flags field */
|
||||
--- a/kernel/cgroup.c
|
||||
+++ b/kernel/cgroup.c
|
||||
@@ -4421,10 +4421,10 @@ static void css_free_rcu_fn(struct rcu_h
|
||||
queue_work(cgroup_destroy_wq, &css->destroy_work);
|
||||
}
|
||||
|
||||
-static void css_release_work_fn(struct work_struct *work)
|
||||
+static void css_release_work_fn(struct swork_event *sev)
|
||||
{
|
||||
struct cgroup_subsys_state *css =
|
||||
- container_of(work, struct cgroup_subsys_state, destroy_work);
|
||||
+ container_of(sev, struct cgroup_subsys_state, destroy_swork);
|
||||
struct cgroup_subsys *ss = css->ss;
|
||||
struct cgroup *cgrp = css->cgroup;
|
||||
|
||||
@@ -4463,8 +4463,8 @@ static void css_release(struct percpu_re
|
||||
struct cgroup_subsys_state *css =
|
||||
container_of(ref, struct cgroup_subsys_state, refcnt);
|
||||
|
||||
- INIT_WORK(&css->destroy_work, css_release_work_fn);
|
||||
- queue_work(cgroup_destroy_wq, &css->destroy_work);
|
||||
+ INIT_SWORK(&css->destroy_swork, css_release_work_fn);
|
||||
+ swork_queue(&css->destroy_swork);
|
||||
}
|
||||
|
||||
static void init_and_link_css(struct cgroup_subsys_state *css,
|
||||
@@ -5068,6 +5068,7 @@ static int __init cgroup_wq_init(void)
|
||||
*/
|
||||
cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
|
||||
BUG_ON(!cgroup_destroy_wq);
|
||||
+ BUG_ON(swork_get());
|
||||
|
||||
/*
|
||||
* Used to destroy pidlists and separate to serve as flush domain.
|
|
@ -1,161 +0,0 @@
|
|||
From: Benedikt Spranger <b.spranger@linutronix.de>
|
||||
Date: Mon, 8 Mar 2010 18:57:04 +0100
|
||||
Subject: clocksource: TCLIB: Allow higher clock rates for clock events
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
As default the TCLIB uses the 32KiHz base clock rate for clock events.
|
||||
Add a compile time selection to allow higher clock resulution.
|
||||
|
||||
(fixed up by Sami Pietikäinen <Sami.Pietikainen@wapice.com>)
|
||||
|
||||
Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
drivers/clocksource/tcb_clksrc.c | 37 ++++++++++++++++++++++---------------
|
||||
drivers/misc/Kconfig | 12 ++++++++++--
|
||||
2 files changed, 32 insertions(+), 17 deletions(-)
|
||||
|
||||
--- a/drivers/clocksource/tcb_clksrc.c
|
||||
+++ b/drivers/clocksource/tcb_clksrc.c
|
||||
@@ -23,8 +23,7 @@
|
||||
* this 32 bit free-running counter. the second channel is not used.
|
||||
*
|
||||
* - The third channel may be used to provide a 16-bit clockevent
|
||||
- * source, used in either periodic or oneshot mode. This runs
|
||||
- * at 32 KiHZ, and can handle delays of up to two seconds.
|
||||
+ * source, used in either periodic or oneshot mode.
|
||||
*
|
||||
* A boot clocksource and clockevent source are also currently needed,
|
||||
* unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
|
||||
@@ -74,6 +73,7 @@ static struct clocksource clksrc = {
|
||||
struct tc_clkevt_device {
|
||||
struct clock_event_device clkevt;
|
||||
struct clk *clk;
|
||||
+ u32 freq;
|
||||
void __iomem *regs;
|
||||
};
|
||||
|
||||
@@ -82,13 +82,6 @@ static struct tc_clkevt_device *to_tc_cl
|
||||
return container_of(clkevt, struct tc_clkevt_device, clkevt);
|
||||
}
|
||||
|
||||
-/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
|
||||
- * because using one of the divided clocks would usually mean the
|
||||
- * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
|
||||
- *
|
||||
- * A divided clock could be good for high resolution timers, since
|
||||
- * 30.5 usec resolution can seem "low".
|
||||
- */
|
||||
static u32 timer_clock;
|
||||
|
||||
static void tc_mode(enum clock_event_mode m, struct clock_event_device *d)
|
||||
@@ -111,11 +104,12 @@ static void tc_mode(enum clock_event_mod
|
||||
case CLOCK_EVT_MODE_PERIODIC:
|
||||
clk_enable(tcd->clk);
|
||||
|
||||
- /* slow clock, count up to RC, then irq and restart */
|
||||
+ /* count up to RC, then irq and restart */
|
||||
__raw_writel(timer_clock
|
||||
| ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
|
||||
regs + ATMEL_TC_REG(2, CMR));
|
||||
- __raw_writel((32768 + HZ/2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
|
||||
+ __raw_writel((tcd->freq + HZ / 2) / HZ,
|
||||
+ tcaddr + ATMEL_TC_REG(2, RC));
|
||||
|
||||
/* Enable clock and interrupts on RC compare */
|
||||
__raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
|
||||
@@ -128,7 +122,7 @@ static void tc_mode(enum clock_event_mod
|
||||
case CLOCK_EVT_MODE_ONESHOT:
|
||||
clk_enable(tcd->clk);
|
||||
|
||||
- /* slow clock, count up to RC, then irq and stop */
|
||||
+ /* count up to RC, then irq and stop */
|
||||
__raw_writel(timer_clock | ATMEL_TC_CPCSTOP
|
||||
| ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
|
||||
regs + ATMEL_TC_REG(2, CMR));
|
||||
@@ -157,8 +151,12 @@ static struct tc_clkevt_device clkevt =
|
||||
.name = "tc_clkevt",
|
||||
.features = CLOCK_EVT_FEAT_PERIODIC
|
||||
| CLOCK_EVT_FEAT_ONESHOT,
|
||||
+#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
|
||||
/* Should be lower than at91rm9200's system timer */
|
||||
.rating = 125,
|
||||
+#else
|
||||
+ .rating = 200,
|
||||
+#endif
|
||||
.set_next_event = tc_next_event,
|
||||
.set_mode = tc_mode,
|
||||
},
|
||||
@@ -178,8 +176,9 @@ static irqreturn_t ch2_irq(int irq, void
|
||||
return IRQ_NONE;
|
||||
}
|
||||
|
||||
-static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
|
||||
+static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
|
||||
{
|
||||
+ unsigned divisor = atmel_tc_divisors[divisor_idx];
|
||||
int ret;
|
||||
struct clk *t2_clk = tc->clk[2];
|
||||
int irq = tc->irq[2];
|
||||
@@ -193,7 +192,11 @@ static int __init setup_clkevents(struct
|
||||
clkevt.regs = tc->regs;
|
||||
clkevt.clk = t2_clk;
|
||||
|
||||
- timer_clock = clk32k_divisor_idx;
|
||||
+ timer_clock = divisor_idx;
|
||||
+ if (!divisor)
|
||||
+ clkevt.freq = 32768;
|
||||
+ else
|
||||
+ clkevt.freq = clk_get_rate(t2_clk) / divisor;
|
||||
|
||||
clkevt.clkevt.cpumask = cpumask_of(0);
|
||||
|
||||
@@ -203,7 +206,7 @@ static int __init setup_clkevents(struct
|
||||
return ret;
|
||||
}
|
||||
|
||||
- clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff);
|
||||
+ clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -340,7 +343,11 @@ static int __init tcb_clksrc_init(void)
|
||||
goto err_disable_t1;
|
||||
|
||||
/* channel 2: periodic and oneshot timer support */
|
||||
+#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
|
||||
ret = setup_clkevents(tc, clk32k_divisor_idx);
|
||||
+#else
|
||||
+ ret = setup_clkevents(tc, best_divisor_idx);
|
||||
+#endif
|
||||
if (ret)
|
||||
goto err_unregister_clksrc;
|
||||
|
||||
--- a/drivers/misc/Kconfig
|
||||
+++ b/drivers/misc/Kconfig
|
||||
@@ -69,8 +69,7 @@ config ATMEL_TCB_CLKSRC
|
||||
are combined to make a single 32-bit timer.
|
||||
|
||||
When GENERIC_CLOCKEVENTS is defined, the third timer channel
|
||||
- may be used as a clock event device supporting oneshot mode
|
||||
- (delays of up to two seconds) based on the 32 KiHz clock.
|
||||
+ may be used as a clock event device supporting oneshot mode.
|
||||
|
||||
config ATMEL_TCB_CLKSRC_BLOCK
|
||||
int
|
||||
@@ -84,6 +83,15 @@ config ATMEL_TCB_CLKSRC_BLOCK
|
||||
TC can be used for other purposes, such as PWM generation and
|
||||
interval timing.
|
||||
|
||||
+config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
|
||||
+ bool "TC Block use 32 KiHz clock"
|
||||
+ depends on ATMEL_TCB_CLKSRC
|
||||
+ default y
|
||||
+ help
|
||||
+ Select this to use 32 KiHz base clock rate as TC block clock
|
||||
+ source for clock events.
|
||||
+
|
||||
+
|
||||
config DUMMY_IRQ
|
||||
tristate "Dummy IRQ handler"
|
||||
default n
|
|
@ -1,225 +0,0 @@
|
|||
Subject: completion: Use simple wait queues
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 11 Jan 2013 11:23:51 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Completions have no long lasting callbacks and therefor do not need
|
||||
the complex waitqueue variant. Use simple waitqueues which reduces the
|
||||
contention on the waitqueue lock.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
drivers/net/wireless/orinoco/orinoco_usb.c | 2 -
|
||||
drivers/usb/gadget/function/f_fs.c | 2 -
|
||||
drivers/usb/gadget/legacy/inode.c | 4 +--
|
||||
include/linux/completion.h | 9 +++-----
|
||||
include/linux/uprobes.h | 1
|
||||
kernel/sched/completion.c | 32 ++++++++++++++---------------
|
||||
kernel/sched/core.c | 10 +++++++--
|
||||
7 files changed, 33 insertions(+), 27 deletions(-)
|
||||
|
||||
--- a/drivers/net/wireless/orinoco/orinoco_usb.c
|
||||
+++ b/drivers/net/wireless/orinoco/orinoco_usb.c
|
||||
@@ -697,7 +697,7 @@ static void ezusb_req_ctx_wait(struct ez
|
||||
while (!ctx->done.done && msecs--)
|
||||
udelay(1000);
|
||||
} else {
|
||||
- wait_event_interruptible(ctx->done.wait,
|
||||
+ swait_event_interruptible(ctx->done.wait,
|
||||
ctx->done.done);
|
||||
}
|
||||
break;
|
||||
--- a/drivers/usb/gadget/function/f_fs.c
|
||||
+++ b/drivers/usb/gadget/function/f_fs.c
|
||||
@@ -1403,7 +1403,7 @@ static void ffs_data_put(struct ffs_data
|
||||
pr_info("%s(): freeing\n", __func__);
|
||||
ffs_data_clear(ffs);
|
||||
BUG_ON(waitqueue_active(&ffs->ev.waitq) ||
|
||||
- waitqueue_active(&ffs->ep0req_completion.wait));
|
||||
+ swaitqueue_active(&ffs->ep0req_completion.wait));
|
||||
kfree(ffs->dev_name);
|
||||
kfree(ffs);
|
||||
}
|
||||
--- a/drivers/usb/gadget/legacy/inode.c
|
||||
+++ b/drivers/usb/gadget/legacy/inode.c
|
||||
@@ -345,7 +345,7 @@ ep_io (struct ep_data *epdata, void *buf
|
||||
spin_unlock_irq (&epdata->dev->lock);
|
||||
|
||||
if (likely (value == 0)) {
|
||||
- value = wait_event_interruptible (done.wait, done.done);
|
||||
+ value = swait_event_interruptible (done.wait, done.done);
|
||||
if (value != 0) {
|
||||
spin_lock_irq (&epdata->dev->lock);
|
||||
if (likely (epdata->ep != NULL)) {
|
||||
@@ -354,7 +354,7 @@ ep_io (struct ep_data *epdata, void *buf
|
||||
usb_ep_dequeue (epdata->ep, epdata->req);
|
||||
spin_unlock_irq (&epdata->dev->lock);
|
||||
|
||||
- wait_event (done.wait, done.done);
|
||||
+ swait_event (done.wait, done.done);
|
||||
if (epdata->status == -ECONNRESET)
|
||||
epdata->status = -EINTR;
|
||||
} else {
|
||||
--- a/include/linux/completion.h
|
||||
+++ b/include/linux/completion.h
|
||||
@@ -7,8 +7,7 @@
|
||||
* Atomic wait-for-completion handler data structures.
|
||||
* See kernel/sched/completion.c for details.
|
||||
*/
|
||||
-
|
||||
-#include <linux/wait.h>
|
||||
+#include <linux/wait-simple.h>
|
||||
|
||||
/*
|
||||
* struct completion - structure used to maintain state for a "completion"
|
||||
@@ -24,11 +23,11 @@
|
||||
*/
|
||||
struct completion {
|
||||
unsigned int done;
|
||||
- wait_queue_head_t wait;
|
||||
+ struct swait_head wait;
|
||||
};
|
||||
|
||||
#define COMPLETION_INITIALIZER(work) \
|
||||
- { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
|
||||
+ { 0, SWAIT_HEAD_INITIALIZER((work).wait) }
|
||||
|
||||
#define COMPLETION_INITIALIZER_ONSTACK(work) \
|
||||
({ init_completion(&work); work; })
|
||||
@@ -73,7 +72,7 @@ struct completion {
|
||||
static inline void init_completion(struct completion *x)
|
||||
{
|
||||
x->done = 0;
|
||||
- init_waitqueue_head(&x->wait);
|
||||
+ init_swait_head(&x->wait);
|
||||
}
|
||||
|
||||
/**
|
||||
--- a/include/linux/uprobes.h
|
||||
+++ b/include/linux/uprobes.h
|
||||
@@ -27,6 +27,7 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/types.h>
|
||||
+#include <linux/wait.h>
|
||||
|
||||
struct vm_area_struct;
|
||||
struct mm_struct;
|
||||
--- a/kernel/sched/completion.c
|
||||
+++ b/kernel/sched/completion.c
|
||||
@@ -30,10 +30,10 @@ void complete(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&x->wait.lock, flags);
|
||||
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
|
||||
x->done++;
|
||||
- __wake_up_locked(&x->wait, TASK_NORMAL, 1);
|
||||
- spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
+ __swait_wake_locked(&x->wait, TASK_NORMAL, 1);
|
||||
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(complete);
|
||||
|
||||
@@ -50,10 +50,10 @@ void complete_all(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&x->wait.lock, flags);
|
||||
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
|
||||
x->done += UINT_MAX/2;
|
||||
- __wake_up_locked(&x->wait, TASK_NORMAL, 0);
|
||||
- spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
+ __swait_wake_locked(&x->wait, TASK_NORMAL, 0);
|
||||
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(complete_all);
|
||||
|
||||
@@ -62,20 +62,20 @@ do_wait_for_common(struct completion *x,
|
||||
long (*action)(long), long timeout, int state)
|
||||
{
|
||||
if (!x->done) {
|
||||
- DECLARE_WAITQUEUE(wait, current);
|
||||
+ DEFINE_SWAITER(wait);
|
||||
|
||||
- __add_wait_queue_tail_exclusive(&x->wait, &wait);
|
||||
+ swait_prepare_locked(&x->wait, &wait);
|
||||
do {
|
||||
if (signal_pending_state(state, current)) {
|
||||
timeout = -ERESTARTSYS;
|
||||
break;
|
||||
}
|
||||
__set_current_state(state);
|
||||
- spin_unlock_irq(&x->wait.lock);
|
||||
+ raw_spin_unlock_irq(&x->wait.lock);
|
||||
timeout = action(timeout);
|
||||
- spin_lock_irq(&x->wait.lock);
|
||||
+ raw_spin_lock_irq(&x->wait.lock);
|
||||
} while (!x->done && timeout);
|
||||
- __remove_wait_queue(&x->wait, &wait);
|
||||
+ swait_finish_locked(&x->wait, &wait);
|
||||
if (!x->done)
|
||||
return timeout;
|
||||
}
|
||||
@@ -89,9 +89,9 @@ static inline long __sched
|
||||
{
|
||||
might_sleep();
|
||||
|
||||
- spin_lock_irq(&x->wait.lock);
|
||||
+ raw_spin_lock_irq(&x->wait.lock);
|
||||
timeout = do_wait_for_common(x, action, timeout, state);
|
||||
- spin_unlock_irq(&x->wait.lock);
|
||||
+ raw_spin_unlock_irq(&x->wait.lock);
|
||||
return timeout;
|
||||
}
|
||||
|
||||
@@ -277,12 +277,12 @@ bool try_wait_for_completion(struct comp
|
||||
if (!READ_ONCE(x->done))
|
||||
return 0;
|
||||
|
||||
- spin_lock_irqsave(&x->wait.lock, flags);
|
||||
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
|
||||
if (!x->done)
|
||||
ret = 0;
|
||||
else
|
||||
x->done--;
|
||||
- spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(try_wait_for_completion);
|
||||
@@ -311,7 +311,7 @@ bool completion_done(struct completion *
|
||||
* after it's acquired the lock.
|
||||
*/
|
||||
smp_rmb();
|
||||
- spin_unlock_wait(&x->wait.lock);
|
||||
+ raw_spin_unlock_wait(&x->wait.lock);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(completion_done);
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -2802,7 +2802,10 @@ void migrate_disable(void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
- WARN_ON_ONCE(p->migrate_disable_atomic);
|
||||
+ if (unlikely(p->migrate_disable_atomic)) {
|
||||
+ tracing_off();
|
||||
+ WARN_ON_ONCE(1);
|
||||
+ }
|
||||
#endif
|
||||
|
||||
if (p->migrate_disable) {
|
||||
@@ -2832,7 +2835,10 @@ void migrate_enable(void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
- WARN_ON_ONCE(p->migrate_disable_atomic);
|
||||
+ if (unlikely(p->migrate_disable_atomic)) {
|
||||
+ tracing_off();
|
||||
+ WARN_ON_ONCE(1);
|
||||
+ }
|
||||
#endif
|
||||
WARN_ON_ONCE(p->migrate_disable <= 0);
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
Subject: sched: Use the proper LOCK_OFFSET for cond_resched()
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 17 Jul 2011 22:51:33 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
RT does not increment preempt count when a 'sleeping' spinlock is
|
||||
locked. Update PREEMPT_LOCK_OFFSET for that case.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/sched.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -2926,7 +2926,7 @@ extern int _cond_resched(void);
|
||||
|
||||
extern int __cond_resched_lock(spinlock_t *lock);
|
||||
|
||||
-#ifdef CONFIG_PREEMPT_COUNT
|
||||
+#if defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT_FULL)
|
||||
#define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET
|
||||
#else
|
||||
#define PREEMPT_LOCK_OFFSET 0
|
|
@ -1,53 +0,0 @@
|
|||
Subject: sched: Take RT softirq semantics into account in cond_resched()
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 14 Jul 2011 09:56:44 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The softirq semantics work different on -RT. There is no SOFTIRQ_MASK in
|
||||
the preemption counter which leads to the BUG_ON() statement in
|
||||
__cond_resched_softirq(). As for -RT it is enough to perform a "normal"
|
||||
schedule.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/sched.h | 4 ++++
|
||||
kernel/sched/core.c | 2 ++
|
||||
2 files changed, 6 insertions(+)
|
||||
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -2937,12 +2937,16 @@ extern int __cond_resched_lock(spinlock_
|
||||
__cond_resched_lock(lock); \
|
||||
})
|
||||
|
||||
+#ifndef CONFIG_PREEMPT_RT_FULL
|
||||
extern int __cond_resched_softirq(void);
|
||||
|
||||
#define cond_resched_softirq() ({ \
|
||||
___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
|
||||
__cond_resched_softirq(); \
|
||||
})
|
||||
+#else
|
||||
+# define cond_resched_softirq() cond_resched()
|
||||
+#endif
|
||||
|
||||
static inline void cond_resched_rcu(void)
|
||||
{
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -4479,6 +4479,7 @@ int __cond_resched_lock(spinlock_t *lock
|
||||
}
|
||||
EXPORT_SYMBOL(__cond_resched_lock);
|
||||
|
||||
+#ifndef CONFIG_PREEMPT_RT_FULL
|
||||
int __sched __cond_resched_softirq(void)
|
||||
{
|
||||
BUG_ON(!in_softirq());
|
||||
@@ -4492,6 +4493,7 @@ int __sched __cond_resched_softirq(void)
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(__cond_resched_softirq);
|
||||
+#endif
|
||||
|
||||
/**
|
||||
* yield - yield the current processor to other threads.
|
|
@ -1,56 +0,0 @@
|
|||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Thu, 5 Dec 2013 09:16:52 -0500
|
||||
Subject: cpu hotplug: Document why PREEMPT_RT uses a spinlock
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The patch:
|
||||
|
||||
cpu: Make hotplug.lock a "sleeping" spinlock on RT
|
||||
|
||||
Tasks can block on hotplug.lock in pin_current_cpu(), but their
|
||||
state might be != RUNNING. So the mutex wakeup will set the state
|
||||
unconditionally to RUNNING. That might cause spurious unexpected
|
||||
wakeups. We could provide a state preserving mutex_lock() function,
|
||||
but this is semantically backwards. So instead we convert the
|
||||
hotplug.lock() to a spinlock for RT, which has the state preserving
|
||||
semantics already.
|
||||
|
||||
Fixed a bug where the hotplug lock on PREEMPT_RT can be called after a
|
||||
task set its state to TASK_UNINTERRUPTIBLE and before it called
|
||||
schedule. If the hotplug_lock used a mutex, and there was contention,
|
||||
the current task's state would be turned to TASK_RUNNABLE and the
|
||||
schedule call will not sleep. This caused unexpected results.
|
||||
|
||||
Although the patch had a description of the change, the code had no
|
||||
comments about it. This causes confusion to those that review the code,
|
||||
and as PREEMPT_RT is held in a quilt queue and not git, it's not as easy
|
||||
to see why a change was made. Even if it was in git, the code should
|
||||
still have a comment for something as subtle as this.
|
||||
|
||||
Document the rational for using a spinlock on PREEMPT_RT in the hotplug
|
||||
lock code.
|
||||
|
||||
Reported-by: Nicholas Mc Guire <der.herr@hofr.at>
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/cpu.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -109,6 +109,14 @@ struct hotplug_pcp {
|
||||
int grab_lock;
|
||||
struct completion synced;
|
||||
#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ /*
|
||||
+ * Note, on PREEMPT_RT, the hotplug lock must save the state of
|
||||
+ * the task, otherwise the mutex will cause the task to fail
|
||||
+ * to sleep when required. (Because it's called from migrate_disable())
|
||||
+ *
|
||||
+ * The spinlock_t on PREEMPT_RT is a mutex that saves the task's
|
||||
+ * state.
|
||||
+ */
|
||||
spinlock_t lock;
|
||||
#else
|
||||
struct mutex mutex;
|
|
@ -1,131 +0,0 @@
|
|||
Subject: cpu: Make hotplug.lock a "sleeping" spinlock on RT
|
||||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Fri, 02 Mar 2012 10:36:57 -0500
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Tasks can block on hotplug.lock in pin_current_cpu(), but their state
|
||||
might be != RUNNING. So the mutex wakeup will set the state
|
||||
unconditionally to RUNNING. That might cause spurious unexpected
|
||||
wakeups. We could provide a state preserving mutex_lock() function,
|
||||
but this is semantically backwards. So instead we convert the
|
||||
hotplug.lock() to a spinlock for RT, which has the state preserving
|
||||
semantics already.
|
||||
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Cc: Carsten Emde <C.Emde@osadl.org>
|
||||
Cc: John Kacur <jkacur@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Clark Williams <clark.williams@gmail.com>
|
||||
|
||||
Link: http://lkml.kernel.org/r/1330702617.25686.265.camel@gandalf.stny.rr.com
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/cpu.c | 38 +++++++++++++++++++++++++++++---------
|
||||
1 file changed, 29 insertions(+), 9 deletions(-)
|
||||
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -59,10 +59,16 @@ static int cpu_hotplug_disabled;
|
||||
|
||||
static struct {
|
||||
struct task_struct *active_writer;
|
||||
+
|
||||
/* wait queue to wake up the active_writer */
|
||||
wait_queue_head_t wq;
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ /* Makes the lock keep the task's state */
|
||||
+ spinlock_t lock;
|
||||
+#else
|
||||
/* verifies that no writer will get active while readers are active */
|
||||
struct mutex lock;
|
||||
+#endif
|
||||
/*
|
||||
* Also blocks the new readers during
|
||||
* an ongoing cpu hotplug operation.
|
||||
@@ -75,12 +81,26 @@ static struct {
|
||||
} cpu_hotplug = {
|
||||
.active_writer = NULL,
|
||||
.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ .lock = __SPIN_LOCK_UNLOCKED(cpu_hotplug.lock),
|
||||
+#else
|
||||
.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
|
||||
+#endif
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
.dep_map = {.name = "cpu_hotplug.lock" },
|
||||
#endif
|
||||
};
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+# define hotplug_lock() rt_spin_lock(&cpu_hotplug.lock)
|
||||
+# define hotplug_trylock() rt_spin_trylock(&cpu_hotplug.lock)
|
||||
+# define hotplug_unlock() rt_spin_unlock(&cpu_hotplug.lock)
|
||||
+#else
|
||||
+# define hotplug_lock() mutex_lock(&cpu_hotplug.lock)
|
||||
+# define hotplug_trylock() mutex_trylock(&cpu_hotplug.lock)
|
||||
+# define hotplug_unlock() mutex_unlock(&cpu_hotplug.lock)
|
||||
+#endif
|
||||
+
|
||||
/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
|
||||
#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
|
||||
#define cpuhp_lock_acquire_tryread() \
|
||||
@@ -117,8 +137,8 @@ void pin_current_cpu(void)
|
||||
return;
|
||||
}
|
||||
preempt_enable();
|
||||
- mutex_lock(&cpu_hotplug.lock);
|
||||
- mutex_unlock(&cpu_hotplug.lock);
|
||||
+ hotplug_lock();
|
||||
+ hotplug_unlock();
|
||||
preempt_disable();
|
||||
goto retry;
|
||||
}
|
||||
@@ -191,9 +211,9 @@ void get_online_cpus(void)
|
||||
if (cpu_hotplug.active_writer == current)
|
||||
return;
|
||||
cpuhp_lock_acquire_read();
|
||||
- mutex_lock(&cpu_hotplug.lock);
|
||||
+ hotplug_lock();
|
||||
atomic_inc(&cpu_hotplug.refcount);
|
||||
- mutex_unlock(&cpu_hotplug.lock);
|
||||
+ hotplug_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_online_cpus);
|
||||
|
||||
@@ -201,11 +221,11 @@ bool try_get_online_cpus(void)
|
||||
{
|
||||
if (cpu_hotplug.active_writer == current)
|
||||
return true;
|
||||
- if (!mutex_trylock(&cpu_hotplug.lock))
|
||||
+ if (!hotplug_trylock())
|
||||
return false;
|
||||
cpuhp_lock_acquire_tryread();
|
||||
atomic_inc(&cpu_hotplug.refcount);
|
||||
- mutex_unlock(&cpu_hotplug.lock);
|
||||
+ hotplug_unlock();
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(try_get_online_cpus);
|
||||
@@ -259,11 +279,11 @@ void cpu_hotplug_begin(void)
|
||||
cpuhp_lock_acquire();
|
||||
|
||||
for (;;) {
|
||||
- mutex_lock(&cpu_hotplug.lock);
|
||||
+ hotplug_lock();
|
||||
prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
|
||||
if (likely(!atomic_read(&cpu_hotplug.refcount)))
|
||||
break;
|
||||
- mutex_unlock(&cpu_hotplug.lock);
|
||||
+ hotplug_unlock();
|
||||
schedule();
|
||||
}
|
||||
finish_wait(&cpu_hotplug.wq, &wait);
|
||||
@@ -272,7 +292,7 @@ void cpu_hotplug_begin(void)
|
||||
void cpu_hotplug_done(void)
|
||||
{
|
||||
cpu_hotplug.active_writer = NULL;
|
||||
- mutex_unlock(&cpu_hotplug.lock);
|
||||
+ hotplug_unlock();
|
||||
cpuhp_lock_release();
|
||||
}
|
||||
|
|
@ -1,562 +0,0 @@
|
|||
From: Steven Rostedt <srostedt@redhat.com>
|
||||
Date: Mon, 16 Jul 2012 08:07:43 +0000
|
||||
Subject: cpu/rt: Rework cpu down for PREEMPT_RT
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Bringing a CPU down is a pain with the PREEMPT_RT kernel because
|
||||
tasks can be preempted in many more places than in non-RT. In
|
||||
order to handle per_cpu variables, tasks may be pinned to a CPU
|
||||
for a while, and even sleep. But these tasks need to be off the CPU
|
||||
if that CPU is going down.
|
||||
|
||||
Several synchronization methods have been tried, but when stressed
|
||||
they failed. This is a new approach.
|
||||
|
||||
A sync_tsk thread is still created and tasks may still block on a
|
||||
lock when the CPU is going down, but how that works is a bit different.
|
||||
When cpu_down() starts, it will create the sync_tsk and wait on it
|
||||
to inform that current tasks that are pinned on the CPU are no longer
|
||||
pinned. But new tasks that are about to be pinned will still be allowed
|
||||
to do so at this time.
|
||||
|
||||
Then the notifiers are called. Several notifiers will bring down tasks
|
||||
that will enter these locations. Some of these tasks will take locks
|
||||
of other tasks that are on the CPU. If we don't let those other tasks
|
||||
continue, but make them block until CPU down is done, the tasks that
|
||||
the notifiers are waiting on will never complete as they are waiting
|
||||
for the locks held by the tasks that are blocked.
|
||||
|
||||
Thus we still let the task pin the CPU until the notifiers are done.
|
||||
After the notifiers run, we then make new tasks entering the pinned
|
||||
CPU sections grab a mutex and wait. This mutex is now a per CPU mutex
|
||||
in the hotplug_pcp descriptor.
|
||||
|
||||
To help things along, a new function in the scheduler code is created
|
||||
called migrate_me(). This function will try to migrate the current task
|
||||
off the CPU this is going down if possible. When the sync_tsk is created,
|
||||
all tasks will then try to migrate off the CPU going down. There are
|
||||
several cases that this wont work, but it helps in most cases.
|
||||
|
||||
After the notifiers are called and if a task can't migrate off but enters
|
||||
the pin CPU sections, it will be forced to wait on the hotplug_pcp mutex
|
||||
until the CPU down is complete. Then the scheduler will force the migration
|
||||
anyway.
|
||||
|
||||
Also, I found that THREAD_BOUND need to also be accounted for in the
|
||||
pinned CPU, and the migrate_disable no longer treats them special.
|
||||
This helps fix issues with ksoftirqd and workqueue that unbind on CPU down.
|
||||
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
include/linux/sched.h | 7 +
|
||||
kernel/cpu.c | 244 ++++++++++++++++++++++++++++++++++++++++----------
|
||||
kernel/sched/core.c | 82 ++++++++++++++++
|
||||
3 files changed, 285 insertions(+), 48 deletions(-)
|
||||
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -2217,6 +2217,10 @@ extern void do_set_cpus_allowed(struct t
|
||||
|
||||
extern int set_cpus_allowed_ptr(struct task_struct *p,
|
||||
const struct cpumask *new_mask);
|
||||
+int migrate_me(void);
|
||||
+void tell_sched_cpu_down_begin(int cpu);
|
||||
+void tell_sched_cpu_down_done(int cpu);
|
||||
+
|
||||
#else
|
||||
static inline void do_set_cpus_allowed(struct task_struct *p,
|
||||
const struct cpumask *new_mask)
|
||||
@@ -2229,6 +2233,9 @@ static inline int set_cpus_allowed_ptr(s
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
+static inline int migrate_me(void) { return 0; }
|
||||
+static inline void tell_sched_cpu_down_begin(int cpu) { }
|
||||
+static inline void tell_sched_cpu_down_done(int cpu) { }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -59,16 +59,10 @@ static int cpu_hotplug_disabled;
|
||||
|
||||
static struct {
|
||||
struct task_struct *active_writer;
|
||||
-
|
||||
/* wait queue to wake up the active_writer */
|
||||
wait_queue_head_t wq;
|
||||
-#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
- /* Makes the lock keep the task's state */
|
||||
- spinlock_t lock;
|
||||
-#else
|
||||
/* verifies that no writer will get active while readers are active */
|
||||
struct mutex lock;
|
||||
-#endif
|
||||
/*
|
||||
* Also blocks the new readers during
|
||||
* an ongoing cpu hotplug operation.
|
||||
@@ -80,27 +74,13 @@ static struct {
|
||||
#endif
|
||||
} cpu_hotplug = {
|
||||
.active_writer = NULL,
|
||||
- .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
|
||||
-#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
- .lock = __SPIN_LOCK_UNLOCKED(cpu_hotplug.lock),
|
||||
-#else
|
||||
.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
|
||||
-#endif
|
||||
+ .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
.dep_map = {.name = "cpu_hotplug.lock" },
|
||||
#endif
|
||||
};
|
||||
|
||||
-#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
-# define hotplug_lock() rt_spin_lock(&cpu_hotplug.lock)
|
||||
-# define hotplug_trylock() rt_spin_trylock(&cpu_hotplug.lock)
|
||||
-# define hotplug_unlock() rt_spin_unlock(&cpu_hotplug.lock)
|
||||
-#else
|
||||
-# define hotplug_lock() mutex_lock(&cpu_hotplug.lock)
|
||||
-# define hotplug_trylock() mutex_trylock(&cpu_hotplug.lock)
|
||||
-# define hotplug_unlock() mutex_unlock(&cpu_hotplug.lock)
|
||||
-#endif
|
||||
-
|
||||
/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
|
||||
#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
|
||||
#define cpuhp_lock_acquire_tryread() \
|
||||
@@ -108,12 +88,42 @@ static struct {
|
||||
#define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
|
||||
#define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
|
||||
|
||||
+/**
|
||||
+ * hotplug_pcp - per cpu hotplug descriptor
|
||||
+ * @unplug: set when pin_current_cpu() needs to sync tasks
|
||||
+ * @sync_tsk: the task that waits for tasks to finish pinned sections
|
||||
+ * @refcount: counter of tasks in pinned sections
|
||||
+ * @grab_lock: set when the tasks entering pinned sections should wait
|
||||
+ * @synced: notifier for @sync_tsk to tell cpu_down it's finished
|
||||
+ * @mutex: the mutex to make tasks wait (used when @grab_lock is true)
|
||||
+ * @mutex_init: zero if the mutex hasn't been initialized yet.
|
||||
+ *
|
||||
+ * Although @unplug and @sync_tsk may point to the same task, the @unplug
|
||||
+ * is used as a flag and still exists after @sync_tsk has exited and
|
||||
+ * @sync_tsk set to NULL.
|
||||
+ */
|
||||
struct hotplug_pcp {
|
||||
struct task_struct *unplug;
|
||||
+ struct task_struct *sync_tsk;
|
||||
int refcount;
|
||||
+ int grab_lock;
|
||||
struct completion synced;
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ spinlock_t lock;
|
||||
+#else
|
||||
+ struct mutex mutex;
|
||||
+#endif
|
||||
+ int mutex_init;
|
||||
};
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+# define hotplug_lock(hp) rt_spin_lock(&(hp)->lock)
|
||||
+# define hotplug_unlock(hp) rt_spin_unlock(&(hp)->lock)
|
||||
+#else
|
||||
+# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
|
||||
+# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
|
||||
+#endif
|
||||
+
|
||||
static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
|
||||
|
||||
/**
|
||||
@@ -127,18 +137,39 @@ static DEFINE_PER_CPU(struct hotplug_pcp
|
||||
void pin_current_cpu(void)
|
||||
{
|
||||
struct hotplug_pcp *hp;
|
||||
+ int force = 0;
|
||||
|
||||
retry:
|
||||
hp = this_cpu_ptr(&hotplug_pcp);
|
||||
|
||||
- if (!hp->unplug || hp->refcount || preempt_count() > 1 ||
|
||||
+ if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
|
||||
hp->unplug == current) {
|
||||
hp->refcount++;
|
||||
return;
|
||||
}
|
||||
- preempt_enable();
|
||||
- hotplug_lock();
|
||||
- hotplug_unlock();
|
||||
+ if (hp->grab_lock) {
|
||||
+ preempt_enable();
|
||||
+ hotplug_lock(hp);
|
||||
+ hotplug_unlock(hp);
|
||||
+ } else {
|
||||
+ preempt_enable();
|
||||
+ /*
|
||||
+ * Try to push this task off of this CPU.
|
||||
+ */
|
||||
+ if (!migrate_me()) {
|
||||
+ preempt_disable();
|
||||
+ hp = this_cpu_ptr(&hotplug_pcp);
|
||||
+ if (!hp->grab_lock) {
|
||||
+ /*
|
||||
+ * Just let it continue it's already pinned
|
||||
+ * or about to sleep.
|
||||
+ */
|
||||
+ force = 1;
|
||||
+ goto retry;
|
||||
+ }
|
||||
+ preempt_enable();
|
||||
+ }
|
||||
+ }
|
||||
preempt_disable();
|
||||
goto retry;
|
||||
}
|
||||
@@ -159,26 +190,84 @@ void unpin_current_cpu(void)
|
||||
wake_up_process(hp->unplug);
|
||||
}
|
||||
|
||||
-/*
|
||||
- * FIXME: Is this really correct under all circumstances ?
|
||||
- */
|
||||
+static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
|
||||
+{
|
||||
+ set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
+ while (hp->refcount) {
|
||||
+ schedule_preempt_disabled();
|
||||
+ set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static int sync_unplug_thread(void *data)
|
||||
{
|
||||
struct hotplug_pcp *hp = data;
|
||||
|
||||
preempt_disable();
|
||||
hp->unplug = current;
|
||||
+ wait_for_pinned_cpus(hp);
|
||||
+
|
||||
+ /*
|
||||
+ * This thread will synchronize the cpu_down() with threads
|
||||
+ * that have pinned the CPU. When the pinned CPU count reaches
|
||||
+ * zero, we inform the cpu_down code to continue to the next step.
|
||||
+ */
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
- while (hp->refcount) {
|
||||
- schedule_preempt_disabled();
|
||||
+ preempt_enable();
|
||||
+ complete(&hp->synced);
|
||||
+
|
||||
+ /*
|
||||
+ * If all succeeds, the next step will need tasks to wait till
|
||||
+ * the CPU is offline before continuing. To do this, the grab_lock
|
||||
+ * is set and tasks going into pin_current_cpu() will block on the
|
||||
+ * mutex. But we still need to wait for those that are already in
|
||||
+ * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
|
||||
+ * will kick this thread out.
|
||||
+ */
|
||||
+ while (!hp->grab_lock && !kthread_should_stop()) {
|
||||
+ schedule();
|
||||
+ set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
+ }
|
||||
+
|
||||
+ /* Make sure grab_lock is seen before we see a stale completion */
|
||||
+ smp_mb();
|
||||
+
|
||||
+ /*
|
||||
+ * Now just before cpu_down() enters stop machine, we need to make
|
||||
+ * sure all tasks that are in pinned CPU sections are out, and new
|
||||
+ * tasks will now grab the lock, keeping them from entering pinned
|
||||
+ * CPU sections.
|
||||
+ */
|
||||
+ if (!kthread_should_stop()) {
|
||||
+ preempt_disable();
|
||||
+ wait_for_pinned_cpus(hp);
|
||||
+ preempt_enable();
|
||||
+ complete(&hp->synced);
|
||||
+ }
|
||||
+
|
||||
+ set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
+ while (!kthread_should_stop()) {
|
||||
+ schedule();
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
set_current_state(TASK_RUNNING);
|
||||
- preempt_enable();
|
||||
- complete(&hp->synced);
|
||||
+
|
||||
+ /*
|
||||
+ * Force this thread off this CPU as it's going down and
|
||||
+ * we don't want any more work on this CPU.
|
||||
+ */
|
||||
+ current->flags &= ~PF_NO_SETAFFINITY;
|
||||
+ do_set_cpus_allowed(current, cpu_present_mask);
|
||||
+ migrate_me();
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void __cpu_unplug_sync(struct hotplug_pcp *hp)
|
||||
+{
|
||||
+ wake_up_process(hp->sync_tsk);
|
||||
+ wait_for_completion(&hp->synced);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Start the sync_unplug_thread on the target cpu and wait for it to
|
||||
* complete.
|
||||
@@ -186,23 +275,83 @@ static int sync_unplug_thread(void *data
|
||||
static int cpu_unplug_begin(unsigned int cpu)
|
||||
{
|
||||
struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
|
||||
- struct task_struct *tsk;
|
||||
+ int err;
|
||||
+
|
||||
+ /* Protected by cpu_hotplug.lock */
|
||||
+ if (!hp->mutex_init) {
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ spin_lock_init(&hp->lock);
|
||||
+#else
|
||||
+ mutex_init(&hp->mutex);
|
||||
+#endif
|
||||
+ hp->mutex_init = 1;
|
||||
+ }
|
||||
+
|
||||
+ /* Inform the scheduler to migrate tasks off this CPU */
|
||||
+ tell_sched_cpu_down_begin(cpu);
|
||||
|
||||
init_completion(&hp->synced);
|
||||
- tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
|
||||
- if (IS_ERR(tsk))
|
||||
- return (PTR_ERR(tsk));
|
||||
- kthread_bind(tsk, cpu);
|
||||
- wake_up_process(tsk);
|
||||
- wait_for_completion(&hp->synced);
|
||||
+
|
||||
+ hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
|
||||
+ if (IS_ERR(hp->sync_tsk)) {
|
||||
+ err = PTR_ERR(hp->sync_tsk);
|
||||
+ hp->sync_tsk = NULL;
|
||||
+ return err;
|
||||
+ }
|
||||
+ kthread_bind(hp->sync_tsk, cpu);
|
||||
+
|
||||
+ /*
|
||||
+ * Wait for tasks to get out of the pinned sections,
|
||||
+ * it's still OK if new tasks enter. Some CPU notifiers will
|
||||
+ * wait for tasks that are going to enter these sections and
|
||||
+ * we must not have them block.
|
||||
+ */
|
||||
+ __cpu_unplug_sync(hp);
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void cpu_unplug_sync(unsigned int cpu)
|
||||
+{
|
||||
+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
|
||||
+
|
||||
+ init_completion(&hp->synced);
|
||||
+ /* The completion needs to be initialzied before setting grab_lock */
|
||||
+ smp_wmb();
|
||||
+
|
||||
+ /* Grab the mutex before setting grab_lock */
|
||||
+ hotplug_lock(hp);
|
||||
+ hp->grab_lock = 1;
|
||||
+
|
||||
+ /*
|
||||
+ * The CPU notifiers have been completed.
|
||||
+ * Wait for tasks to get out of pinned CPU sections and have new
|
||||
+ * tasks block until the CPU is completely down.
|
||||
+ */
|
||||
+ __cpu_unplug_sync(hp);
|
||||
+
|
||||
+ /* All done with the sync thread */
|
||||
+ kthread_stop(hp->sync_tsk);
|
||||
+ hp->sync_tsk = NULL;
|
||||
+}
|
||||
+
|
||||
static void cpu_unplug_done(unsigned int cpu)
|
||||
{
|
||||
struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
|
||||
|
||||
hp->unplug = NULL;
|
||||
+ /* Let all tasks know cpu unplug is finished before cleaning up */
|
||||
+ smp_wmb();
|
||||
+
|
||||
+ if (hp->sync_tsk)
|
||||
+ kthread_stop(hp->sync_tsk);
|
||||
+
|
||||
+ if (hp->grab_lock) {
|
||||
+ hotplug_unlock(hp);
|
||||
+ /* protected by cpu_hotplug.lock */
|
||||
+ hp->grab_lock = 0;
|
||||
+ }
|
||||
+ tell_sched_cpu_down_done(cpu);
|
||||
}
|
||||
|
||||
void get_online_cpus(void)
|
||||
@@ -211,9 +360,9 @@ void get_online_cpus(void)
|
||||
if (cpu_hotplug.active_writer == current)
|
||||
return;
|
||||
cpuhp_lock_acquire_read();
|
||||
- hotplug_lock();
|
||||
+ mutex_lock(&cpu_hotplug.lock);
|
||||
atomic_inc(&cpu_hotplug.refcount);
|
||||
- hotplug_unlock();
|
||||
+ mutex_unlock(&cpu_hotplug.lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_online_cpus);
|
||||
|
||||
@@ -221,11 +370,11 @@ bool try_get_online_cpus(void)
|
||||
{
|
||||
if (cpu_hotplug.active_writer == current)
|
||||
return true;
|
||||
- if (!hotplug_trylock())
|
||||
+ if (!mutex_trylock(&cpu_hotplug.lock))
|
||||
return false;
|
||||
cpuhp_lock_acquire_tryread();
|
||||
atomic_inc(&cpu_hotplug.refcount);
|
||||
- hotplug_unlock();
|
||||
+ mutex_unlock(&cpu_hotplug.lock);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(try_get_online_cpus);
|
||||
@@ -279,11 +428,11 @@ void cpu_hotplug_begin(void)
|
||||
cpuhp_lock_acquire();
|
||||
|
||||
for (;;) {
|
||||
- hotplug_lock();
|
||||
+ mutex_lock(&cpu_hotplug.lock);
|
||||
prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
|
||||
if (likely(!atomic_read(&cpu_hotplug.refcount)))
|
||||
break;
|
||||
- hotplug_unlock();
|
||||
+ mutex_unlock(&cpu_hotplug.lock);
|
||||
schedule();
|
||||
}
|
||||
finish_wait(&cpu_hotplug.wq, &wait);
|
||||
@@ -292,7 +441,7 @@ void cpu_hotplug_begin(void)
|
||||
void cpu_hotplug_done(void)
|
||||
{
|
||||
cpu_hotplug.active_writer = NULL;
|
||||
- hotplug_unlock();
|
||||
+ mutex_unlock(&cpu_hotplug.lock);
|
||||
cpuhp_lock_release();
|
||||
}
|
||||
|
||||
@@ -527,6 +676,9 @@ static int __ref _cpu_down(unsigned int
|
||||
|
||||
smpboot_park_threads(cpu);
|
||||
|
||||
+ /* Notifiers are done. Don't let any more tasks pin this CPU. */
|
||||
+ cpu_unplug_sync(cpu);
|
||||
+
|
||||
/*
|
||||
* So now all preempt/rcu users must observe !cpu_active().
|
||||
*/
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -2754,7 +2754,7 @@ void migrate_disable(void)
|
||||
{
|
||||
struct task_struct *p = current;
|
||||
|
||||
- if (in_atomic() || p->flags & PF_NO_SETAFFINITY) {
|
||||
+ if (in_atomic()) {
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
p->migrate_disable_atomic++;
|
||||
#endif
|
||||
@@ -2787,7 +2787,7 @@ void migrate_enable(void)
|
||||
unsigned long flags;
|
||||
struct rq *rq;
|
||||
|
||||
- if (in_atomic() || p->flags & PF_NO_SETAFFINITY) {
|
||||
+ if (in_atomic()) {
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
p->migrate_disable_atomic--;
|
||||
#endif
|
||||
@@ -4960,6 +4960,84 @@ void do_set_cpus_allowed(struct task_str
|
||||
cpumask_copy(&p->cpus_allowed, new_mask);
|
||||
}
|
||||
|
||||
+static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
|
||||
+static DEFINE_MUTEX(sched_down_mutex);
|
||||
+static cpumask_t sched_down_cpumask;
|
||||
+
|
||||
+void tell_sched_cpu_down_begin(int cpu)
|
||||
+{
|
||||
+ mutex_lock(&sched_down_mutex);
|
||||
+ cpumask_set_cpu(cpu, &sched_down_cpumask);
|
||||
+ mutex_unlock(&sched_down_mutex);
|
||||
+}
|
||||
+
|
||||
+void tell_sched_cpu_down_done(int cpu)
|
||||
+{
|
||||
+ mutex_lock(&sched_down_mutex);
|
||||
+ cpumask_clear_cpu(cpu, &sched_down_cpumask);
|
||||
+ mutex_unlock(&sched_down_mutex);
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * migrate_me - try to move the current task off this cpu
|
||||
+ *
|
||||
+ * Used by the pin_current_cpu() code to try to get tasks
|
||||
+ * to move off the current CPU as it is going down.
|
||||
+ * It will only move the task if the task isn't pinned to
|
||||
+ * the CPU (with migrate_disable, affinity or NO_SETAFFINITY)
|
||||
+ * and the task has to be in a RUNNING state. Otherwise the
|
||||
+ * movement of the task will wake it up (change its state
|
||||
+ * to running) when the task did not expect it.
|
||||
+ *
|
||||
+ * Returns 1 if it succeeded in moving the current task
|
||||
+ * 0 otherwise.
|
||||
+ */
|
||||
+int migrate_me(void)
|
||||
+{
|
||||
+ struct task_struct *p = current;
|
||||
+ struct migration_arg arg;
|
||||
+ struct cpumask *cpumask;
|
||||
+ struct cpumask *mask;
|
||||
+ unsigned long flags;
|
||||
+ unsigned int dest_cpu;
|
||||
+ struct rq *rq;
|
||||
+
|
||||
+ /*
|
||||
+ * We can not migrate tasks bounded to a CPU or tasks not
|
||||
+ * running. The movement of the task will wake it up.
|
||||
+ */
|
||||
+ if (p->flags & PF_NO_SETAFFINITY || p->state)
|
||||
+ return 0;
|
||||
+
|
||||
+ mutex_lock(&sched_down_mutex);
|
||||
+ rq = task_rq_lock(p, &flags);
|
||||
+
|
||||
+ cpumask = this_cpu_ptr(&sched_cpumasks);
|
||||
+ mask = &p->cpus_allowed;
|
||||
+
|
||||
+ cpumask_andnot(cpumask, mask, &sched_down_cpumask);
|
||||
+
|
||||
+ if (!cpumask_weight(cpumask)) {
|
||||
+ /* It's only on this CPU? */
|
||||
+ task_rq_unlock(rq, p, &flags);
|
||||
+ mutex_unlock(&sched_down_mutex);
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
|
||||
+
|
||||
+ arg.task = p;
|
||||
+ arg.dest_cpu = dest_cpu;
|
||||
+
|
||||
+ task_rq_unlock(rq, p, &flags);
|
||||
+
|
||||
+ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
|
||||
+ tlb_migrate_finish(p->mm);
|
||||
+ mutex_unlock(&sched_down_mutex);
|
||||
+
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* This is how migration works:
|
||||
*
|
|
@ -1,107 +0,0 @@
|
|||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Tue, 4 Mar 2014 12:28:32 -0500
|
||||
Subject: cpu_chill: Add a UNINTERRUPTIBLE hrtimer_nanosleep
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
We hit another bug that was caused by switching cpu_chill() from
|
||||
msleep() to hrtimer_nanosleep().
|
||||
|
||||
This time it is a livelock. The problem is that hrtimer_nanosleep()
|
||||
calls schedule with the state == TASK_INTERRUPTIBLE. But these means
|
||||
that if a signal is pending, the scheduler wont schedule, and will
|
||||
simply change the current task state back to TASK_RUNNING. This
|
||||
nullifies the whole point of cpu_chill() in the first place. That is,
|
||||
if a task is spinning on a try_lock() and it preempted the owner of the
|
||||
lock, if it has a signal pending, it will never give up the CPU to let
|
||||
the owner of the lock run.
|
||||
|
||||
I made a static function __hrtimer_nanosleep() that takes a fifth
|
||||
parameter "state", which determines the task state of that the
|
||||
nanosleep() will be in. The normal hrtimer_nanosleep() will act the
|
||||
same, but cpu_chill() will call the __hrtimer_nanosleep() directly with
|
||||
the TASK_UNINTERRUPTIBLE state.
|
||||
|
||||
cpu_chill() only cares that the first sleep happens, and does not care
|
||||
about the state of the restart schedule (in hrtimer_nanosleep_restart).
|
||||
|
||||
|
||||
Reported-by: Ulrich Obergfell <uobergfe@redhat.com>
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/time/hrtimer.c | 25 ++++++++++++++++++-------
|
||||
1 file changed, 18 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/kernel/time/hrtimer.c
|
||||
+++ b/kernel/time/hrtimer.c
|
||||
@@ -1746,12 +1746,13 @@ void hrtimer_init_sleeper(struct hrtimer
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
|
||||
|
||||
-static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
|
||||
+static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode,
|
||||
+ unsigned long state)
|
||||
{
|
||||
hrtimer_init_sleeper(t, current);
|
||||
|
||||
do {
|
||||
- set_current_state(TASK_INTERRUPTIBLE);
|
||||
+ set_current_state(state);
|
||||
hrtimer_start_expires(&t->timer, mode);
|
||||
if (!hrtimer_active(&t->timer))
|
||||
t->task = NULL;
|
||||
@@ -1795,7 +1796,8 @@ long __sched hrtimer_nanosleep_restart(s
|
||||
HRTIMER_MODE_ABS);
|
||||
hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
|
||||
|
||||
- if (do_nanosleep(&t, HRTIMER_MODE_ABS))
|
||||
+ /* cpu_chill() does not care about restart state. */
|
||||
+ if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE))
|
||||
goto out;
|
||||
|
||||
rmtp = restart->nanosleep.rmtp;
|
||||
@@ -1812,8 +1814,10 @@ long __sched hrtimer_nanosleep_restart(s
|
||||
return ret;
|
||||
}
|
||||
|
||||
-long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
|
||||
- const enum hrtimer_mode mode, const clockid_t clockid)
|
||||
+static long
|
||||
+__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
|
||||
+ const enum hrtimer_mode mode, const clockid_t clockid,
|
||||
+ unsigned long state)
|
||||
{
|
||||
struct restart_block *restart;
|
||||
struct hrtimer_sleeper t;
|
||||
@@ -1826,7 +1830,7 @@ long hrtimer_nanosleep(struct timespec *
|
||||
|
||||
hrtimer_init_on_stack(&t.timer, clockid, mode);
|
||||
hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
|
||||
- if (do_nanosleep(&t, mode))
|
||||
+ if (do_nanosleep(&t, mode, state))
|
||||
goto out;
|
||||
|
||||
/* Absolute timers do not update the rmtp value and restart: */
|
||||
@@ -1853,6 +1857,12 @@ long hrtimer_nanosleep(struct timespec *
|
||||
return ret;
|
||||
}
|
||||
|
||||
+long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
|
||||
+ const enum hrtimer_mode mode, const clockid_t clockid)
|
||||
+{
|
||||
+ return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE);
|
||||
+}
|
||||
+
|
||||
SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
|
||||
struct timespec __user *, rmtp)
|
||||
{
|
||||
@@ -1879,7 +1889,8 @@ void cpu_chill(void)
|
||||
unsigned int freeze_flag = current->flags & PF_NOFREEZE;
|
||||
|
||||
current->flags |= PF_NOFREEZE;
|
||||
- hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
|
||||
+ __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC,
|
||||
+ TASK_UNINTERRUPTIBLE);
|
||||
if (!freeze_flag)
|
||||
current->flags &= ~PF_NOFREEZE;
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
From: Tiejun Chen <tiejun.chen@windriver.com>
|
||||
Subject: cpu_down: move migrate_enable() back
|
||||
Date: Thu, 7 Nov 2013 10:06:07 +0800
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Commit 08c1ab68, "hotplug-use-migrate-disable.patch", intends to
|
||||
use migrate_enable()/migrate_disable() to replace that combination
|
||||
of preempt_enable() and preempt_disable(), but actually in
|
||||
!CONFIG_PREEMPT_RT_FULL case, migrate_enable()/migrate_disable()
|
||||
are still equal to preempt_enable()/preempt_disable(). So that
|
||||
followed cpu_hotplug_begin()/cpu_unplug_begin(cpu) would go schedule()
|
||||
to trigger schedule_debug() like this:
|
||||
|
||||
_cpu_down()
|
||||
|
|
||||
+ migrate_disable() = preempt_disable()
|
||||
|
|
||||
+ cpu_hotplug_begin() or cpu_unplug_begin()
|
||||
|
|
||||
+ schedule()
|
||||
|
|
||||
+ __schedule()
|
||||
|
|
||||
+ preempt_disable();
|
||||
|
|
||||
+ __schedule_bug() is true!
|
||||
|
||||
So we should move migrate_enable() as the original scheme.
|
||||
|
||||
|
||||
Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com>
|
||||
---
|
||||
kernel/cpu.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -668,6 +668,7 @@ static int __ref _cpu_down(unsigned int
|
||||
err = -EBUSY;
|
||||
goto restore_cpus;
|
||||
}
|
||||
+ migrate_enable();
|
||||
|
||||
cpu_hotplug_begin();
|
||||
err = cpu_unplug_begin(cpu);
|
||||
@@ -744,7 +745,6 @@ static int __ref _cpu_down(unsigned int
|
||||
out_release:
|
||||
cpu_unplug_done(cpu);
|
||||
out_cancel:
|
||||
- migrate_enable();
|
||||
cpu_hotplug_done();
|
||||
if (!err)
|
||||
cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
|
|
@ -1,196 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 21 Jul 2015 15:28:49 +0200
|
||||
Subject: cpufreq: Remove cpufreq_rwsem
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
cpufreq_rwsem was introduced in commit 6eed9404ab3c4 ("cpufreq: Use
|
||||
rwsem for protecting critical sections) in order to replace
|
||||
try_module_get() on the cpu-freq driver. That try_module_get() worked
|
||||
well until the refcount was so heavily used that module removal became
|
||||
more or less impossible.
|
||||
|
||||
Though when looking at the various (undocumented) protection
|
||||
mechanisms in that code, the randomly sprinkeled around cpufreq_rwsem
|
||||
locking sites are superfluous.
|
||||
|
||||
The policy, which is acquired in cpufreq_cpu_get() and released in
|
||||
cpufreq_cpu_put() is sufficiently protected already.
|
||||
|
||||
cpufreq_cpu_get(cpu)
|
||||
/* Protects against concurrent driver removal */
|
||||
read_lock_irqsave(&cpufreq_driver_lock, flags);
|
||||
policy = per_cpu(cpufreq_cpu_data, cpu);
|
||||
kobject_get(&policy->kobj);
|
||||
read_unlock_irqrestore(&cpufreq_driver_lock, flags);
|
||||
|
||||
The reference on the policy serializes versus module unload already:
|
||||
|
||||
cpufreq_unregister_driver()
|
||||
subsys_interface_unregister()
|
||||
__cpufreq_remove_dev_finish()
|
||||
per_cpu(cpufreq_cpu_data) = NULL;
|
||||
cpufreq_policy_put_kobj()
|
||||
|
||||
If there is a reference held on the policy, i.e. obtained prior to the
|
||||
unregister call, then cpufreq_policy_put_kobj() will wait until that
|
||||
reference is dropped. So once subsys_interface_unregister() returns
|
||||
there is no policy pointer in flight and no new reference can be
|
||||
obtained. So that rwsem protection is useless.
|
||||
|
||||
The other usage of cpufreq_rwsem in show()/store() of the sysfs
|
||||
interface is redundant as well because sysfs already does the proper
|
||||
kobject_get()/put() pairs.
|
||||
|
||||
That leaves CPU hotplug versus module removal. The current
|
||||
down_write() around the write_lock() in cpufreq_unregister_driver() is
|
||||
silly at best as it protects actually nothing.
|
||||
|
||||
The trivial solution to this is to prevent hotplug across
|
||||
cpufreq_unregister_driver completely.
|
||||
|
||||
[upstream: rafael/linux-pm 454d3a2500a4eb33be85dde3bfba9e5f6b5efadc]
|
||||
[fixes: "cpufreq_stat_notifier_trans: No policy found" since v4.0-rt]
|
||||
Cc: stable-rt@vger.kernel.org
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/cpufreq/cpufreq.c | 35 +++--------------------------------
|
||||
1 file changed, 3 insertions(+), 32 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/cpufreq.c
|
||||
+++ b/drivers/cpufreq/cpufreq.c
|
||||
@@ -64,12 +64,6 @@ static inline bool has_target(void)
|
||||
return cpufreq_driver->target_index || cpufreq_driver->target;
|
||||
}
|
||||
|
||||
-/*
|
||||
- * rwsem to guarantee that cpufreq driver module doesn't unload during critical
|
||||
- * sections
|
||||
- */
|
||||
-static DECLARE_RWSEM(cpufreq_rwsem);
|
||||
-
|
||||
/* internal prototypes */
|
||||
static int __cpufreq_governor(struct cpufreq_policy *policy,
|
||||
unsigned int event);
|
||||
@@ -215,9 +209,6 @@ struct cpufreq_policy *cpufreq_cpu_get(u
|
||||
if (cpu >= nr_cpu_ids)
|
||||
return NULL;
|
||||
|
||||
- if (!down_read_trylock(&cpufreq_rwsem))
|
||||
- return NULL;
|
||||
-
|
||||
/* get the cpufreq driver */
|
||||
read_lock_irqsave(&cpufreq_driver_lock, flags);
|
||||
|
||||
@@ -230,9 +221,6 @@ struct cpufreq_policy *cpufreq_cpu_get(u
|
||||
|
||||
read_unlock_irqrestore(&cpufreq_driver_lock, flags);
|
||||
|
||||
- if (!policy)
|
||||
- up_read(&cpufreq_rwsem);
|
||||
-
|
||||
return policy;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
|
||||
@@ -240,7 +228,6 @@ EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
|
||||
void cpufreq_cpu_put(struct cpufreq_policy *policy)
|
||||
{
|
||||
kobject_put(&policy->kobj);
|
||||
- up_read(&cpufreq_rwsem);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
|
||||
|
||||
@@ -765,9 +752,6 @@ static ssize_t show(struct kobject *kobj
|
||||
struct freq_attr *fattr = to_attr(attr);
|
||||
ssize_t ret;
|
||||
|
||||
- if (!down_read_trylock(&cpufreq_rwsem))
|
||||
- return -EINVAL;
|
||||
-
|
||||
down_read(&policy->rwsem);
|
||||
|
||||
if (fattr->show)
|
||||
@@ -776,7 +760,6 @@ static ssize_t show(struct kobject *kobj
|
||||
ret = -EIO;
|
||||
|
||||
up_read(&policy->rwsem);
|
||||
- up_read(&cpufreq_rwsem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -793,9 +776,6 @@ static ssize_t store(struct kobject *kob
|
||||
if (!cpu_online(policy->cpu))
|
||||
goto unlock;
|
||||
|
||||
- if (!down_read_trylock(&cpufreq_rwsem))
|
||||
- goto unlock;
|
||||
-
|
||||
down_write(&policy->rwsem);
|
||||
|
||||
if (fattr->store)
|
||||
@@ -804,8 +784,6 @@ static ssize_t store(struct kobject *kob
|
||||
ret = -EIO;
|
||||
|
||||
up_write(&policy->rwsem);
|
||||
-
|
||||
- up_read(&cpufreq_rwsem);
|
||||
unlock:
|
||||
put_online_cpus();
|
||||
|
||||
@@ -1117,16 +1095,12 @@ static int __cpufreq_add_dev(struct devi
|
||||
if (unlikely(policy))
|
||||
return 0;
|
||||
|
||||
- if (!down_read_trylock(&cpufreq_rwsem))
|
||||
- return 0;
|
||||
-
|
||||
/* Check if this cpu was hot-unplugged earlier and has siblings */
|
||||
read_lock_irqsave(&cpufreq_driver_lock, flags);
|
||||
for_each_policy(policy) {
|
||||
if (cpumask_test_cpu(cpu, policy->related_cpus)) {
|
||||
read_unlock_irqrestore(&cpufreq_driver_lock, flags);
|
||||
ret = cpufreq_add_policy_cpu(policy, cpu, dev);
|
||||
- up_read(&cpufreq_rwsem);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@@ -1269,8 +1243,6 @@ static int __cpufreq_add_dev(struct devi
|
||||
|
||||
kobject_uevent(&policy->kobj, KOBJ_ADD);
|
||||
|
||||
- up_read(&cpufreq_rwsem);
|
||||
-
|
||||
/* Callback for handling stuff after policy is ready */
|
||||
if (cpufreq_driver->ready)
|
||||
cpufreq_driver->ready(policy);
|
||||
@@ -1304,8 +1276,6 @@ static int __cpufreq_add_dev(struct devi
|
||||
cpufreq_policy_free(policy);
|
||||
|
||||
nomem_out:
|
||||
- up_read(&cpufreq_rwsem);
|
||||
-
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2499,19 +2469,20 @@ int cpufreq_unregister_driver(struct cpu
|
||||
|
||||
pr_debug("unregistering driver %s\n", driver->name);
|
||||
|
||||
+ /* Protect against concurrent cpu hotplug */
|
||||
+ get_online_cpus();
|
||||
subsys_interface_unregister(&cpufreq_interface);
|
||||
if (cpufreq_boost_supported())
|
||||
cpufreq_sysfs_remove_file(&boost.attr);
|
||||
|
||||
unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
|
||||
|
||||
- down_write(&cpufreq_rwsem);
|
||||
write_lock_irqsave(&cpufreq_driver_lock, flags);
|
||||
|
||||
cpufreq_driver = NULL;
|
||||
|
||||
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
|
||||
- up_write(&cpufreq_rwsem);
|
||||
+ put_online_cpus();
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 9 Apr 2015 15:23:01 +0200
|
||||
Subject: cpufreq: drop K8's driver from beeing selected
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Ralf posted a picture of a backtrace from
|
||||
|
||||
| powernowk8_target_fn() -> transition_frequency_fidvid() and then at the
|
||||
| end:
|
||||
| 932 policy = cpufreq_cpu_get(smp_processor_id());
|
||||
| 933 cpufreq_cpu_put(policy);
|
||||
|
||||
crashing the system on -RT. I assumed that policy was a NULL pointer but
|
||||
was rulled out. Since Ralf can't do any more investigations on this and
|
||||
I have no machine with this, I simply switch it off.
|
||||
|
||||
Reported-by: Ralf Mardorf <ralf.mardorf@alice-dsl.net>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/cpufreq/Kconfig.x86 | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/cpufreq/Kconfig.x86
|
||||
+++ b/drivers/cpufreq/Kconfig.x86
|
||||
@@ -123,7 +123,7 @@ config X86_POWERNOW_K7_ACPI
|
||||
|
||||
config X86_POWERNOW_K8
|
||||
tristate "AMD Opteron/Athlon64 PowerNow!"
|
||||
- depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ
|
||||
+ depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE
|
||||
help
|
||||
This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors.
|
||||
Support for K10 and newer processors is now in acpi-cpufreq.
|
|
@ -1,35 +0,0 @@
|
|||
Subject: cpumask: Disable CONFIG_CPUMASK_OFFSTACK for RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 14 Dec 2011 01:03:49 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
We can't deal with the cpumask allocations which happen in atomic
|
||||
context (see arch/x86/kernel/apic/io_apic.c) on RT right now.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/x86/Kconfig | 2 +-
|
||||
lib/Kconfig | 1 +
|
||||
2 files changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/x86/Kconfig
|
||||
+++ b/arch/x86/Kconfig
|
||||
@@ -841,7 +841,7 @@ config IOMMU_HELPER
|
||||
config MAXSMP
|
||||
bool "Enable Maximum number of SMP Processors and NUMA Nodes"
|
||||
depends on X86_64 && SMP && DEBUG_KERNEL
|
||||
- select CPUMASK_OFFSTACK
|
||||
+ select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
|
||||
---help---
|
||||
Enable maximum number of CPUS and NUMA Nodes for this architecture.
|
||||
If unsure, say N.
|
||||
--- a/lib/Kconfig
|
||||
+++ b/lib/Kconfig
|
||||
@@ -391,6 +391,7 @@ config CHECK_SIGNATURE
|
||||
|
||||
config CPUMASK_OFFSTACK
|
||||
bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
|
||||
+ depends on !PREEMPT_RT_FULL
|
||||
help
|
||||
Use dynamic allocation for cpumask_var_t, instead of putting
|
||||
them on the stack. This is a bit more expensive, but avoids
|
|
@ -1,242 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 21 Feb 2014 17:24:04 +0100
|
||||
Subject: crypto: Reduce preempt disabled regions, more algos
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Don Estabrook reported
|
||||
| kernel: WARNING: CPU: 2 PID: 858 at kernel/sched/core.c:2428 migrate_disable+0xed/0x100()
|
||||
| kernel: WARNING: CPU: 2 PID: 858 at kernel/sched/core.c:2462 migrate_enable+0x17b/0x200()
|
||||
| kernel: WARNING: CPU: 3 PID: 865 at kernel/sched/core.c:2428 migrate_disable+0xed/0x100()
|
||||
|
||||
and his backtrace showed some crypto functions which looked fine.
|
||||
|
||||
The problem is the following sequence:
|
||||
|
||||
glue_xts_crypt_128bit()
|
||||
{
|
||||
blkcipher_walk_virt(); /* normal migrate_disable() */
|
||||
|
||||
glue_fpu_begin(); /* get atomic */
|
||||
|
||||
while (nbytes) {
|
||||
__glue_xts_crypt_128bit();
|
||||
blkcipher_walk_done(); /* with nbytes = 0, migrate_enable()
|
||||
* while we are atomic */
|
||||
};
|
||||
glue_fpu_end() /* no longer atomic */
|
||||
}
|
||||
|
||||
and this is why the counter get out of sync and the warning is printed.
|
||||
The other problem is that we are non-preemptible between
|
||||
glue_fpu_begin() and glue_fpu_end() and the latency grows. To fix this,
|
||||
I shorten the FPU off region and ensure blkcipher_walk_done() is called
|
||||
with preemption enabled. This might hurt the performance because we now
|
||||
enable/disable the FPU state more often but we gain lower latency and
|
||||
the bug is gone.
|
||||
|
||||
|
||||
Reported-by: Don Estabrook <don.estabrook@gmail.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/x86/crypto/cast5_avx_glue.c | 21 +++++++++------------
|
||||
arch/x86/crypto/glue_helper.c | 31 +++++++++++++++----------------
|
||||
2 files changed, 24 insertions(+), 28 deletions(-)
|
||||
|
||||
--- a/arch/x86/crypto/cast5_avx_glue.c
|
||||
+++ b/arch/x86/crypto/cast5_avx_glue.c
|
||||
@@ -60,7 +60,7 @@ static inline void cast5_fpu_end(bool fp
|
||||
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
|
||||
bool enc)
|
||||
{
|
||||
- bool fpu_enabled = false;
|
||||
+ bool fpu_enabled;
|
||||
struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
const unsigned int bsize = CAST5_BLOCK_SIZE;
|
||||
unsigned int nbytes;
|
||||
@@ -76,7 +76,7 @@ static int ecb_crypt(struct blkcipher_de
|
||||
u8 *wsrc = walk->src.virt.addr;
|
||||
u8 *wdst = walk->dst.virt.addr;
|
||||
|
||||
- fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
|
||||
+ fpu_enabled = cast5_fpu_begin(false, nbytes);
|
||||
|
||||
/* Process multi-block batch */
|
||||
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
|
||||
@@ -104,10 +104,9 @@ static int ecb_crypt(struct blkcipher_de
|
||||
} while (nbytes >= bsize);
|
||||
|
||||
done:
|
||||
+ cast5_fpu_end(fpu_enabled);
|
||||
err = blkcipher_walk_done(desc, walk, nbytes);
|
||||
}
|
||||
-
|
||||
- cast5_fpu_end(fpu_enabled);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -228,7 +227,7 @@ static unsigned int __cbc_decrypt(struct
|
||||
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
- bool fpu_enabled = false;
|
||||
+ bool fpu_enabled;
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
@@ -237,12 +236,11 @@ static int cbc_decrypt(struct blkcipher_
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
- fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
|
||||
+ fpu_enabled = cast5_fpu_begin(false, nbytes);
|
||||
nbytes = __cbc_decrypt(desc, &walk);
|
||||
+ cast5_fpu_end(fpu_enabled);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
-
|
||||
- cast5_fpu_end(fpu_enabled);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -312,7 +310,7 @@ static unsigned int __ctr_crypt(struct b
|
||||
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
- bool fpu_enabled = false;
|
||||
+ bool fpu_enabled;
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
@@ -321,13 +319,12 @@ static int ctr_crypt(struct blkcipher_de
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
|
||||
while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
|
||||
- fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
|
||||
+ fpu_enabled = cast5_fpu_begin(false, nbytes);
|
||||
nbytes = __ctr_crypt(desc, &walk);
|
||||
+ cast5_fpu_end(fpu_enabled);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
- cast5_fpu_end(fpu_enabled);
|
||||
-
|
||||
if (walk.nbytes) {
|
||||
ctr_crypt_final(desc, &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
--- a/arch/x86/crypto/glue_helper.c
|
||||
+++ b/arch/x86/crypto/glue_helper.c
|
||||
@@ -39,7 +39,7 @@ static int __glue_ecb_crypt_128bit(const
|
||||
void *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
const unsigned int bsize = 128 / 8;
|
||||
unsigned int nbytes, i, func_bytes;
|
||||
- bool fpu_enabled = false;
|
||||
+ bool fpu_enabled;
|
||||
int err;
|
||||
|
||||
err = blkcipher_walk_virt(desc, walk);
|
||||
@@ -49,7 +49,7 @@ static int __glue_ecb_crypt_128bit(const
|
||||
u8 *wdst = walk->dst.virt.addr;
|
||||
|
||||
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
|
||||
- desc, fpu_enabled, nbytes);
|
||||
+ desc, false, nbytes);
|
||||
|
||||
for (i = 0; i < gctx->num_funcs; i++) {
|
||||
func_bytes = bsize * gctx->funcs[i].num_blocks;
|
||||
@@ -71,10 +71,10 @@ static int __glue_ecb_crypt_128bit(const
|
||||
}
|
||||
|
||||
done:
|
||||
+ glue_fpu_end(fpu_enabled);
|
||||
err = blkcipher_walk_done(desc, walk, nbytes);
|
||||
}
|
||||
|
||||
- glue_fpu_end(fpu_enabled);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -194,7 +194,7 @@ int glue_cbc_decrypt_128bit(const struct
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
const unsigned int bsize = 128 / 8;
|
||||
- bool fpu_enabled = false;
|
||||
+ bool fpu_enabled;
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
@@ -203,12 +203,12 @@ int glue_cbc_decrypt_128bit(const struct
|
||||
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
|
||||
- desc, fpu_enabled, nbytes);
|
||||
+ desc, false, nbytes);
|
||||
nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
|
||||
+ glue_fpu_end(fpu_enabled);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
- glue_fpu_end(fpu_enabled);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
|
||||
@@ -277,7 +277,7 @@ int glue_ctr_crypt_128bit(const struct c
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
const unsigned int bsize = 128 / 8;
|
||||
- bool fpu_enabled = false;
|
||||
+ bool fpu_enabled;
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
@@ -286,13 +286,12 @@ int glue_ctr_crypt_128bit(const struct c
|
||||
|
||||
while ((nbytes = walk.nbytes) >= bsize) {
|
||||
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
|
||||
- desc, fpu_enabled, nbytes);
|
||||
+ desc, false, nbytes);
|
||||
nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
|
||||
+ glue_fpu_end(fpu_enabled);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
- glue_fpu_end(fpu_enabled);
|
||||
-
|
||||
if (walk.nbytes) {
|
||||
glue_ctr_crypt_final_128bit(
|
||||
gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
|
||||
@@ -347,7 +346,7 @@ int glue_xts_crypt_128bit(const struct c
|
||||
void *tweak_ctx, void *crypt_ctx)
|
||||
{
|
||||
const unsigned int bsize = 128 / 8;
|
||||
- bool fpu_enabled = false;
|
||||
+ bool fpu_enabled;
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
@@ -360,21 +359,21 @@ int glue_xts_crypt_128bit(const struct c
|
||||
|
||||
/* set minimum length to bsize, for tweak_fn */
|
||||
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
|
||||
- desc, fpu_enabled,
|
||||
+ desc, false,
|
||||
nbytes < bsize ? bsize : nbytes);
|
||||
-
|
||||
/* calculate first value of T */
|
||||
tweak_fn(tweak_ctx, walk.iv, walk.iv);
|
||||
+ glue_fpu_end(fpu_enabled);
|
||||
|
||||
while (nbytes) {
|
||||
+ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
|
||||
+ desc, false, nbytes);
|
||||
nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
|
||||
|
||||
+ glue_fpu_end(fpu_enabled);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
nbytes = walk.nbytes;
|
||||
}
|
||||
-
|
||||
- glue_fpu_end(fpu_enabled);
|
||||
-
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
|
|
@ -1,26 +0,0 @@
|
|||
Subject: debugobjects: Make RT aware
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 17 Jul 2011 21:41:35 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Avoid filling the pool / allocating memory with irqs off().
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
lib/debugobjects.c | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/lib/debugobjects.c
|
||||
+++ b/lib/debugobjects.c
|
||||
@@ -309,7 +309,10 @@ static void
|
||||
struct debug_obj *obj;
|
||||
unsigned long flags;
|
||||
|
||||
- fill_pool();
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ if (preempt_count() == 0 && !irqs_disabled())
|
||||
+#endif
|
||||
+ fill_pool();
|
||||
|
||||
db = get_bucket((unsigned long) addr);
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
Subject: dm: Make rt aware
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 14 Nov 2011 23:06:09 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Use the BUG_ON_NORT variant for the irq_disabled() checks. RT has
|
||||
interrupts legitimately enabled here as we cant deadlock against the
|
||||
irq thread due to the "sleeping spinlocks" conversion.
|
||||
|
||||
Reported-by: Luis Claudio R. Goncalves <lclaudio@uudg.org>
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
drivers/md/dm.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/md/dm.c
|
||||
+++ b/drivers/md/dm.c
|
||||
@@ -2143,7 +2143,7 @@ static void dm_request_fn(struct request
|
||||
/* Establish tio->ti before queuing work (map_tio_request) */
|
||||
tio->ti = ti;
|
||||
queue_kthread_work(&md->kworker, &tio->work);
|
||||
- BUG_ON(!irqs_disabled());
|
||||
+ BUG_ON_NONRT(!irqs_disabled());
|
||||
}
|
||||
|
||||
goto out;
|
|
@ -1,26 +0,0 @@
|
|||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:24 -0500
|
||||
Subject: drivers/net: Use disable_irq_nosync() in 8139too
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Use disable_irq_nosync() instead of disable_irq() as this might be
|
||||
called in atomic context with netpoll.
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
drivers/net/ethernet/realtek/8139too.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/ethernet/realtek/8139too.c
|
||||
+++ b/drivers/net/ethernet/realtek/8139too.c
|
||||
@@ -2229,7 +2229,7 @@ static void rtl8139_poll_controller(stru
|
||||
struct rtl8139_private *tp = netdev_priv(dev);
|
||||
const int irq = tp->pci_dev->irq;
|
||||
|
||||
- disable_irq(irq);
|
||||
+ disable_irq_nosync(irq);
|
||||
rtl8139_interrupt(irq, dev);
|
||||
enable_irq(irq);
|
||||
}
|
|
@ -1,127 +0,0 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sat, 20 Jun 2009 11:36:54 +0200
|
||||
Subject: drivers/net: fix livelock issues
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Preempt-RT runs into a live lock issue with the NETDEV_TX_LOCKED micro
|
||||
optimization. The reason is that the softirq thread is rescheduling
|
||||
itself on that return value. Depending on priorities it starts to
|
||||
monoplize the CPU and livelock on UP systems.
|
||||
|
||||
Remove it.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 6 +-----
|
||||
drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 3 +--
|
||||
drivers/net/ethernet/chelsio/cxgb/sge.c | 3 +--
|
||||
drivers/net/ethernet/neterion/s2io.c | 7 +------
|
||||
drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 6 ++----
|
||||
drivers/net/ethernet/tehuti/tehuti.c | 9 ++-------
|
||||
drivers/net/rionet.c | 6 +-----
|
||||
7 files changed, 9 insertions(+), 31 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
|
||||
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
|
||||
@@ -2213,11 +2213,7 @@ static netdev_tx_t atl1c_xmit_frame(stru
|
||||
}
|
||||
|
||||
tpd_req = atl1c_cal_tpd_req(skb);
|
||||
- if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) {
|
||||
- if (netif_msg_pktdata(adapter))
|
||||
- dev_info(&adapter->pdev->dev, "tx locked\n");
|
||||
- return NETDEV_TX_LOCKED;
|
||||
- }
|
||||
+ spin_lock_irqsave(&adapter->tx_lock, flags);
|
||||
|
||||
if (atl1c_tpd_avail(adapter, type) < tpd_req) {
|
||||
/* no enough descriptor, just stop queue */
|
||||
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
|
||||
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
|
||||
@@ -1880,8 +1880,7 @@ static netdev_tx_t atl1e_xmit_frame(stru
|
||||
return NETDEV_TX_OK;
|
||||
}
|
||||
tpd_req = atl1e_cal_tdp_req(skb);
|
||||
- if (!spin_trylock_irqsave(&adapter->tx_lock, flags))
|
||||
- return NETDEV_TX_LOCKED;
|
||||
+ spin_lock_irqsave(&adapter->tx_lock, flags);
|
||||
|
||||
if (atl1e_tpd_avail(adapter) < tpd_req) {
|
||||
/* no enough descriptor, just stop queue */
|
||||
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
|
||||
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
|
||||
@@ -1664,8 +1664,7 @@ static int t1_sge_tx(struct sk_buff *skb
|
||||
struct cmdQ *q = &sge->cmdQ[qid];
|
||||
unsigned int credits, pidx, genbit, count, use_sched_skb = 0;
|
||||
|
||||
- if (!spin_trylock(&q->lock))
|
||||
- return NETDEV_TX_LOCKED;
|
||||
+ spin_lock(&q->lock);
|
||||
|
||||
reclaim_completed_tx(sge, q);
|
||||
|
||||
--- a/drivers/net/ethernet/neterion/s2io.c
|
||||
+++ b/drivers/net/ethernet/neterion/s2io.c
|
||||
@@ -4084,12 +4084,7 @@ static netdev_tx_t s2io_xmit(struct sk_b
|
||||
[skb->priority & (MAX_TX_FIFOS - 1)];
|
||||
fifo = &mac_control->fifos[queue];
|
||||
|
||||
- if (do_spin_lock)
|
||||
- spin_lock_irqsave(&fifo->tx_lock, flags);
|
||||
- else {
|
||||
- if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags)))
|
||||
- return NETDEV_TX_LOCKED;
|
||||
- }
|
||||
+ spin_lock_irqsave(&fifo->tx_lock, flags);
|
||||
|
||||
if (sp->config.multiq) {
|
||||
if (__netif_subqueue_stopped(dev, fifo->fifo_no)) {
|
||||
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
|
||||
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
|
||||
@@ -2137,10 +2137,8 @@ static int pch_gbe_xmit_frame(struct sk_
|
||||
struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring;
|
||||
unsigned long flags;
|
||||
|
||||
- if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) {
|
||||
- /* Collision - tell upper layer to requeue */
|
||||
- return NETDEV_TX_LOCKED;
|
||||
- }
|
||||
+ spin_lock_irqsave(&tx_ring->tx_lock, flags);
|
||||
+
|
||||
if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) {
|
||||
netif_stop_queue(netdev);
|
||||
spin_unlock_irqrestore(&tx_ring->tx_lock, flags);
|
||||
--- a/drivers/net/ethernet/tehuti/tehuti.c
|
||||
+++ b/drivers/net/ethernet/tehuti/tehuti.c
|
||||
@@ -1629,13 +1629,8 @@ static netdev_tx_t bdx_tx_transmit(struc
|
||||
unsigned long flags;
|
||||
|
||||
ENTER;
|
||||
- local_irq_save(flags);
|
||||
- if (!spin_trylock(&priv->tx_lock)) {
|
||||
- local_irq_restore(flags);
|
||||
- DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n",
|
||||
- BDX_DRV_NAME, ndev->name);
|
||||
- return NETDEV_TX_LOCKED;
|
||||
- }
|
||||
+
|
||||
+ spin_lock_irqsave(&priv->tx_lock, flags);
|
||||
|
||||
/* build tx descriptor */
|
||||
BDX_ASSERT(f->m.wptr >= f->m.memsz); /* started with valid wptr */
|
||||
--- a/drivers/net/rionet.c
|
||||
+++ b/drivers/net/rionet.c
|
||||
@@ -174,11 +174,7 @@ static int rionet_start_xmit(struct sk_b
|
||||
unsigned long flags;
|
||||
int add_num = 1;
|
||||
|
||||
- local_irq_save(flags);
|
||||
- if (!spin_trylock(&rnet->tx_lock)) {
|
||||
- local_irq_restore(flags);
|
||||
- return NETDEV_TX_LOCKED;
|
||||
- }
|
||||
+ spin_lock_irqsave(&rnet->tx_lock, flags);
|
||||
|
||||
if (is_multicast_ether_addr(eth->h_dest))
|
||||
add_num = nets[rnet->mport->id].nact;
|
|
@ -1,49 +0,0 @@
|
|||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Fri, 3 Jul 2009 08:30:00 -0500
|
||||
Subject: drivers/net: vortex fix locking issues
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Argh, cut and paste wasn't enough...
|
||||
|
||||
Use this patch instead. It needs an irq disable. But, believe it or not,
|
||||
on SMP this is actually better. If the irq is shared (as it is in Mark's
|
||||
case), we don't stop the irq of other devices from being handled on
|
||||
another CPU (unfortunately for Mark, he pinned all interrupts to one CPU).
|
||||
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
drivers/net/ethernet/3com/3c59x.c | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
|
||||
--- a/drivers/net/ethernet/3com/3c59x.c
|
||||
+++ b/drivers/net/ethernet/3com/3c59x.c
|
||||
@@ -842,9 +842,9 @@ static void poll_vortex(struct net_devic
|
||||
{
|
||||
struct vortex_private *vp = netdev_priv(dev);
|
||||
unsigned long flags;
|
||||
- local_irq_save(flags);
|
||||
+ local_irq_save_nort(flags);
|
||||
(vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev);
|
||||
- local_irq_restore(flags);
|
||||
+ local_irq_restore_nort(flags);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1916,12 +1916,12 @@ static void vortex_tx_timeout(struct net
|
||||
* Block interrupts because vortex_interrupt does a bare spin_lock()
|
||||
*/
|
||||
unsigned long flags;
|
||||
- local_irq_save(flags);
|
||||
+ local_irq_save_nort(flags);
|
||||
if (vp->full_bus_master_tx)
|
||||
boomerang_interrupt(dev->irq, dev);
|
||||
else
|
||||
vortex_interrupt(dev->irq, dev);
|
||||
- local_irq_restore(flags);
|
||||
+ local_irq_restore_nort(flags);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:30 -0500
|
||||
Subject: drivers: random: Reduce preempt disabled region
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
No need to keep preemption disabled across the whole function.
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
drivers/char/random.c | 3 ---
|
||||
1 file changed, 3 deletions(-)
|
||||
|
||||
--- a/drivers/char/random.c
|
||||
+++ b/drivers/char/random.c
|
||||
@@ -776,8 +776,6 @@ static void add_timer_randomness(struct
|
||||
} sample;
|
||||
long delta, delta2, delta3;
|
||||
|
||||
- preempt_disable();
|
||||
-
|
||||
sample.jiffies = jiffies;
|
||||
sample.cycles = random_get_entropy();
|
||||
sample.num = num;
|
||||
@@ -818,7 +816,6 @@ static void add_timer_randomness(struct
|
||||
*/
|
||||
credit_entropy_bits(r, min_t(int, fls(delta>>1), 11));
|
||||
}
|
||||
- preempt_enable();
|
||||
}
|
||||
|
||||
void add_input_randomness(unsigned int type, unsigned int code,
|
|
@ -1,43 +0,0 @@
|
|||
Subject: tty/serial/omap: Make the locking RT aware
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 28 Jul 2011 13:32:57 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The lock is a sleeping lock and local_irq_save() is not the
|
||||
optimsation we are looking for. Redo it to make it work on -RT and
|
||||
non-RT.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
drivers/tty/serial/omap-serial.c | 12 ++++--------
|
||||
1 file changed, 4 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/drivers/tty/serial/omap-serial.c
|
||||
+++ b/drivers/tty/serial/omap-serial.c
|
||||
@@ -1282,13 +1282,10 @@ serial_omap_console_write(struct console
|
||||
|
||||
pm_runtime_get_sync(up->dev);
|
||||
|
||||
- local_irq_save(flags);
|
||||
- if (up->port.sysrq)
|
||||
- locked = 0;
|
||||
- else if (oops_in_progress)
|
||||
- locked = spin_trylock(&up->port.lock);
|
||||
+ if (up->port.sysrq || oops_in_progress)
|
||||
+ locked = spin_trylock_irqsave(&up->port.lock, flags);
|
||||
else
|
||||
- spin_lock(&up->port.lock);
|
||||
+ spin_lock_irqsave(&up->port.lock, flags);
|
||||
|
||||
/*
|
||||
* First save the IER then disable the interrupts
|
||||
@@ -1317,8 +1314,7 @@ serial_omap_console_write(struct console
|
||||
pm_runtime_mark_last_busy(up->dev);
|
||||
pm_runtime_put_autosuspend(up->dev);
|
||||
if (locked)
|
||||
- spin_unlock(&up->port.lock);
|
||||
- local_irq_restore(flags);
|
||||
+ spin_unlock_irqrestore(&up->port.lock, flags);
|
||||
}
|
||||
|
||||
static int __init
|
|
@ -1,48 +0,0 @@
|
|||
Subject: tty/serial/pl011: Make the locking work on RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 08 Jan 2013 21:36:51 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The lock is a sleeping lock and local_irq_save() is not the optimsation
|
||||
we are looking for. Redo it to make it work on -RT and non-RT.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
drivers/tty/serial/amba-pl011.c | 15 ++++++++++-----
|
||||
1 file changed, 10 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/drivers/tty/serial/amba-pl011.c
|
||||
+++ b/drivers/tty/serial/amba-pl011.c
|
||||
@@ -2000,13 +2000,19 @@ pl011_console_write(struct console *co,
|
||||
|
||||
clk_enable(uap->clk);
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ /*
|
||||
+ * local_irq_save(flags);
|
||||
+ *
|
||||
+ * This local_irq_save() is nonsense. If we come in via sysrq
|
||||
+ * handling then interrupts are already disabled. Aside of
|
||||
+ * that the port.sysrq check is racy on SMP regardless.
|
||||
+ */
|
||||
if (uap->port.sysrq)
|
||||
locked = 0;
|
||||
else if (oops_in_progress)
|
||||
- locked = spin_trylock(&uap->port.lock);
|
||||
+ locked = spin_trylock_irqsave(&uap->port.lock, flags);
|
||||
else
|
||||
- spin_lock(&uap->port.lock);
|
||||
+ spin_lock_irqsave(&uap->port.lock, flags);
|
||||
|
||||
/*
|
||||
* First save the CR then disable the interrupts
|
||||
@@ -2028,8 +2034,7 @@ pl011_console_write(struct console *co,
|
||||
writew(old_cr, uap->port.membase + UART011_CR);
|
||||
|
||||
if (locked)
|
||||
- spin_unlock(&uap->port.lock);
|
||||
- local_irq_restore(flags);
|
||||
+ spin_unlock_irqrestore(&uap->port.lock, flags);
|
||||
|
||||
clk_disable(uap->clk);
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 25 Apr 2013 18:12:52 +0200
|
||||
Subject: drm/i915: drop trace_i915_gem_ring_dispatch on rt
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
This tracepoint is responsible for:
|
||||
|
||||
|[<814cc358>] __schedule_bug+0x4d/0x59
|
||||
|[<814d24cc>] __schedule+0x88c/0x930
|
||||
|[<814d3b90>] ? _raw_spin_unlock_irqrestore+0x40/0x50
|
||||
|[<814d3b95>] ? _raw_spin_unlock_irqrestore+0x45/0x50
|
||||
|[<810b57b5>] ? task_blocks_on_rt_mutex+0x1f5/0x250
|
||||
|[<814d27d9>] schedule+0x29/0x70
|
||||
|[<814d3423>] rt_spin_lock_slowlock+0x15b/0x278
|
||||
|[<814d3786>] rt_spin_lock+0x26/0x30
|
||||
|[<a00dced9>] gen6_gt_force_wake_get+0x29/0x60 [i915]
|
||||
|[<a00e183f>] gen6_ring_get_irq+0x5f/0x100 [i915]
|
||||
|[<a00b2a33>] ftrace_raw_event_i915_gem_ring_dispatch+0xe3/0x100 [i915]
|
||||
|[<a00ac1b3>] i915_gem_do_execbuffer.isra.13+0xbd3/0x1430 [i915]
|
||||
|[<810f8943>] ? trace_buffer_unlock_commit+0x43/0x60
|
||||
|[<8113e8d2>] ? ftrace_raw_event_kmem_alloc+0xd2/0x180
|
||||
|[<8101d063>] ? native_sched_clock+0x13/0x80
|
||||
|[<a00acf29>] i915_gem_execbuffer2+0x99/0x280 [i915]
|
||||
|[<a00114a3>] drm_ioctl+0x4c3/0x570 [drm]
|
||||
|[<8101d0d9>] ? sched_clock+0x9/0x10
|
||||
|[<a00ace90>] ? i915_gem_execbuffer+0x480/0x480 [i915]
|
||||
|[<810f1c18>] ? rb_commit+0x68/0xa0
|
||||
|[<810f1c6c>] ? ring_buffer_unlock_commit+0x1c/0xa0
|
||||
|[<81197467>] do_vfs_ioctl+0x97/0x540
|
||||
|[<81021318>] ? ftrace_raw_event_sys_enter+0xd8/0x130
|
||||
|[<811979a1>] sys_ioctl+0x91/0xb0
|
||||
|[<814db931>] tracesys+0xe1/0xe6
|
||||
|
||||
Chris Wilson does not like to move i915_trace_irq_get() out of the macro
|
||||
|
||||
|No. This enables the IRQ, as well as making a number of
|
||||
|very expensively serialised read, unconditionally.
|
||||
|
||||
so it is gone now on RT.
|
||||
|
||||
|
||||
Reported-by: Joakim Hernberg <jbh@alchemy.lu>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
|
||||
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
|
||||
@@ -1339,7 +1339,9 @@ i915_gem_ringbuffer_submission(struct dr
|
||||
return ret;
|
||||
}
|
||||
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags);
|
||||
+#endif
|
||||
|
||||
i915_gem_execbuffer_move_to_active(vmas, ring);
|
||||
i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
|
|
@ -1,31 +0,0 @@
|
|||
Subject: fs/epoll: Do not disable preemption on RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 08 Jul 2011 16:35:35 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
ep_call_nested() takes a sleeping lock so we can't disable preemption.
|
||||
The light version is enough since ep_call_nested() doesn't mind beeing
|
||||
invoked twice on the same CPU.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
fs/eventpoll.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/fs/eventpoll.c
|
||||
+++ b/fs/eventpoll.c
|
||||
@@ -505,12 +505,12 @@ static int ep_poll_wakeup_proc(void *pri
|
||||
*/
|
||||
static void ep_poll_safewake(wait_queue_head_t *wq)
|
||||
{
|
||||
- int this_cpu = get_cpu();
|
||||
+ int this_cpu = get_cpu_light();
|
||||
|
||||
ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
|
||||
ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
|
||||
|
||||
- put_cpu();
|
||||
+ put_cpu_light();
|
||||
}
|
||||
|
||||
static void ep_remove_wait_queue(struct eppoll_entry *pwq)
|
|
@ -1,102 +0,0 @@
|
|||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Tue, 14 Jul 2015 14:26:34 +0200
|
||||
Subject: x86: Do not disable preemption in int3 on 32bit
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Preemption must be disabled before enabling interrupts in do_trap
|
||||
on x86_64 because the stack in use for int3 and debug is a per CPU
|
||||
stack set by th IST. But 32bit does not have an IST and the stack
|
||||
still belongs to the current task and there is no problem in scheduling
|
||||
out the task.
|
||||
|
||||
Keep preemption enabled on X86_32 when enabling interrupts for
|
||||
do_trap().
|
||||
|
||||
The name of the function is changed from preempt_conditional_sti/cli()
|
||||
to conditional_sti/cli_ist(), to annotate that this function is used
|
||||
when the stack is on the IST.
|
||||
|
||||
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
arch/x86/kernel/traps.c | 28 +++++++++++++++++++++-------
|
||||
1 file changed, 21 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/arch/x86/kernel/traps.c
|
||||
+++ b/arch/x86/kernel/traps.c
|
||||
@@ -88,9 +88,21 @@ static inline void conditional_sti(struc
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
-static inline void preempt_conditional_sti(struct pt_regs *regs)
|
||||
+static inline void conditional_sti_ist(struct pt_regs *regs)
|
||||
{
|
||||
+#ifdef CONFIG_X86_64
|
||||
+ /*
|
||||
+ * X86_64 uses a per CPU stack on the IST for certain traps
|
||||
+ * like int3. The task can not be preempted when using one
|
||||
+ * of these stacks, thus preemption must be disabled, otherwise
|
||||
+ * the stack can be corrupted if the task is scheduled out,
|
||||
+ * and another task comes in and uses this stack.
|
||||
+ *
|
||||
+ * On x86_32 the task keeps its own stack and it is OK if the
|
||||
+ * task schedules out.
|
||||
+ */
|
||||
preempt_count_inc();
|
||||
+#endif
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
local_irq_enable();
|
||||
}
|
||||
@@ -101,11 +113,13 @@ static inline void conditional_cli(struc
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
-static inline void preempt_conditional_cli(struct pt_regs *regs)
|
||||
+static inline void conditional_cli_ist(struct pt_regs *regs)
|
||||
{
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
local_irq_disable();
|
||||
+#ifdef CONFIG_X86_64
|
||||
preempt_count_dec();
|
||||
+#endif
|
||||
}
|
||||
|
||||
enum ctx_state ist_enter(struct pt_regs *regs)
|
||||
@@ -536,9 +550,9 @@ dotraplinkage void notrace do_int3(struc
|
||||
* as we may switch to the interrupt stack.
|
||||
*/
|
||||
debug_stack_usage_inc();
|
||||
- preempt_conditional_sti(regs);
|
||||
+ conditional_sti_ist(regs);
|
||||
do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
|
||||
- preempt_conditional_cli(regs);
|
||||
+ conditional_cli_ist(regs);
|
||||
debug_stack_usage_dec();
|
||||
exit:
|
||||
ist_exit(regs, prev_state);
|
||||
@@ -668,12 +682,12 @@ dotraplinkage void do_debug(struct pt_re
|
||||
debug_stack_usage_inc();
|
||||
|
||||
/* It's safe to allow irq's after DR6 has been saved */
|
||||
- preempt_conditional_sti(regs);
|
||||
+ conditional_sti_ist(regs);
|
||||
|
||||
if (v8086_mode(regs)) {
|
||||
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
|
||||
X86_TRAP_DB);
|
||||
- preempt_conditional_cli(regs);
|
||||
+ conditional_cli_ist(regs);
|
||||
debug_stack_usage_dec();
|
||||
goto exit;
|
||||
}
|
||||
@@ -693,7 +707,7 @@ dotraplinkage void do_debug(struct pt_re
|
||||
si_code = get_si_code(tsk->thread.debugreg6);
|
||||
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
|
||||
send_sigtrap(tsk, regs, error_code, si_code);
|
||||
- preempt_conditional_cli(regs);
|
||||
+ conditional_cli_ist(regs);
|
||||
debug_stack_usage_dec();
|
||||
|
||||
exit:
|
|
@ -1,107 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 16 Feb 2015 18:49:10 +0100
|
||||
Subject: fs/aio: simple simple work
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:768
|
||||
|in_atomic(): 1, irqs_disabled(): 0, pid: 26, name: rcuos/2
|
||||
|2 locks held by rcuos/2/26:
|
||||
| #0: (rcu_callback){.+.+..}, at: [<ffffffff810b1a12>] rcu_nocb_kthread+0x1e2/0x380
|
||||
| #1: (rcu_read_lock_sched){.+.+..}, at: [<ffffffff812acd26>] percpu_ref_kill_rcu+0xa6/0x1c0
|
||||
|Preemption disabled at:[<ffffffff810b1a93>] rcu_nocb_kthread+0x263/0x380
|
||||
|Call Trace:
|
||||
| [<ffffffff81582e9e>] dump_stack+0x4e/0x9c
|
||||
| [<ffffffff81077aeb>] __might_sleep+0xfb/0x170
|
||||
| [<ffffffff81589304>] rt_spin_lock+0x24/0x70
|
||||
| [<ffffffff811c5790>] free_ioctx_users+0x30/0x130
|
||||
| [<ffffffff812ace34>] percpu_ref_kill_rcu+0x1b4/0x1c0
|
||||
| [<ffffffff810b1a93>] rcu_nocb_kthread+0x263/0x380
|
||||
| [<ffffffff8106e046>] kthread+0xd6/0xf0
|
||||
| [<ffffffff81591eec>] ret_from_fork+0x7c/0xb0
|
||||
|
||||
replace this preempt_disable() friendly swork.
|
||||
|
||||
Reported-By: Mike Galbraith <umgwanakikbuti@gmail.com>
|
||||
Suggested-by: Benjamin LaHaise <bcrl@kvack.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/aio.c | 24 +++++++++++++++++-------
|
||||
1 file changed, 17 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/fs/aio.c
|
||||
+++ b/fs/aio.c
|
||||
@@ -40,6 +40,7 @@
|
||||
#include <linux/ramfs.h>
|
||||
#include <linux/percpu-refcount.h>
|
||||
#include <linux/mount.h>
|
||||
+#include <linux/work-simple.h>
|
||||
|
||||
#include <asm/kmap_types.h>
|
||||
#include <asm/uaccess.h>
|
||||
@@ -115,7 +116,7 @@ struct kioctx {
|
||||
struct page **ring_pages;
|
||||
long nr_pages;
|
||||
|
||||
- struct work_struct free_work;
|
||||
+ struct swork_event free_work;
|
||||
|
||||
/*
|
||||
* signals when all in-flight requests are done
|
||||
@@ -253,6 +254,7 @@ static int __init aio_setup(void)
|
||||
.mount = aio_mount,
|
||||
.kill_sb = kill_anon_super,
|
||||
};
|
||||
+ BUG_ON(swork_get());
|
||||
aio_mnt = kern_mount(&aio_fs);
|
||||
if (IS_ERR(aio_mnt))
|
||||
panic("Failed to create aio fs mount.");
|
||||
@@ -559,9 +561,9 @@ static int kiocb_cancel(struct aio_kiocb
|
||||
return cancel(&kiocb->common);
|
||||
}
|
||||
|
||||
-static void free_ioctx(struct work_struct *work)
|
||||
+static void free_ioctx(struct swork_event *sev)
|
||||
{
|
||||
- struct kioctx *ctx = container_of(work, struct kioctx, free_work);
|
||||
+ struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
|
||||
|
||||
pr_debug("freeing %p\n", ctx);
|
||||
|
||||
@@ -580,8 +582,8 @@ static void free_ioctx_reqs(struct percp
|
||||
if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
|
||||
complete(&ctx->rq_wait->comp);
|
||||
|
||||
- INIT_WORK(&ctx->free_work, free_ioctx);
|
||||
- schedule_work(&ctx->free_work);
|
||||
+ INIT_SWORK(&ctx->free_work, free_ioctx);
|
||||
+ swork_queue(&ctx->free_work);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -589,9 +591,9 @@ static void free_ioctx_reqs(struct percp
|
||||
* and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
|
||||
* now it's safe to cancel any that need to be.
|
||||
*/
|
||||
-static void free_ioctx_users(struct percpu_ref *ref)
|
||||
+static void free_ioctx_users_work(struct swork_event *sev)
|
||||
{
|
||||
- struct kioctx *ctx = container_of(ref, struct kioctx, users);
|
||||
+ struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
|
||||
struct aio_kiocb *req;
|
||||
|
||||
spin_lock_irq(&ctx->ctx_lock);
|
||||
@@ -610,6 +612,14 @@ static void free_ioctx_users(struct perc
|
||||
percpu_ref_put(&ctx->reqs);
|
||||
}
|
||||
|
||||
+static void free_ioctx_users(struct percpu_ref *ref)
|
||||
+{
|
||||
+ struct kioctx *ctx = container_of(ref, struct kioctx, users);
|
||||
+
|
||||
+ INIT_SWORK(&ctx->free_work, free_ioctx_users_work);
|
||||
+ swork_queue(&ctx->free_work);
|
||||
+}
|
||||
+
|
||||
static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
|
||||
{
|
||||
unsigned i, new_nr;
|
|
@ -1,23 +0,0 @@
|
|||
Subject: block: Turn off warning which is bogus on RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 14 Jun 2011 17:05:09 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
On -RT the context is always with IRQs enabled. Ignore this warning on -RT.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
block/blk-core.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/block/blk-core.c
|
||||
+++ b/block/blk-core.c
|
||||
@@ -194,7 +194,7 @@ EXPORT_SYMBOL(blk_delay_queue);
|
||||
**/
|
||||
void blk_start_queue(struct request_queue *q)
|
||||
{
|
||||
- WARN_ON(!irqs_disabled());
|
||||
+ WARN_ON_NONRT(!irqs_disabled());
|
||||
|
||||
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
|
||||
__blk_run_queue(q);
|
|
@ -1,86 +0,0 @@
|
|||
Subject: fs: dcache: Use cpu_chill() in trylock loops
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 07 Mar 2012 21:00:34 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Retry loops on RT might loop forever when the modifying side was
|
||||
preempted. Use cpu_chill() instead of cpu_relax() to let the system
|
||||
make progress.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
fs/autofs4/autofs_i.h | 1 +
|
||||
fs/autofs4/expire.c | 2 +-
|
||||
fs/dcache.c | 5 +++--
|
||||
fs/namespace.c | 3 ++-
|
||||
4 files changed, 7 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/fs/autofs4/autofs_i.h
|
||||
+++ b/fs/autofs4/autofs_i.h
|
||||
@@ -34,6 +34,7 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/namei.h>
|
||||
+#include <linux/delay.h>
|
||||
#include <asm/current.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
--- a/fs/autofs4/expire.c
|
||||
+++ b/fs/autofs4/expire.c
|
||||
@@ -150,7 +150,7 @@ static struct dentry *get_next_positive_
|
||||
parent = p->d_parent;
|
||||
if (!spin_trylock(&parent->d_lock)) {
|
||||
spin_unlock(&p->d_lock);
|
||||
- cpu_relax();
|
||||
+ cpu_chill();
|
||||
goto relock;
|
||||
}
|
||||
spin_unlock(&p->d_lock);
|
||||
--- a/fs/dcache.c
|
||||
+++ b/fs/dcache.c
|
||||
@@ -19,6 +19,7 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/fsnotify.h>
|
||||
+#include <linux/delay.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/hash.h>
|
||||
@@ -589,7 +590,7 @@ static struct dentry *dentry_kill(struct
|
||||
|
||||
failed:
|
||||
spin_unlock(&dentry->d_lock);
|
||||
- cpu_relax();
|
||||
+ cpu_chill();
|
||||
return dentry; /* try again with same dentry */
|
||||
}
|
||||
|
||||
@@ -2392,7 +2393,7 @@ void d_delete(struct dentry * dentry)
|
||||
if (dentry->d_lockref.count == 1) {
|
||||
if (!spin_trylock(&inode->i_lock)) {
|
||||
spin_unlock(&dentry->d_lock);
|
||||
- cpu_relax();
|
||||
+ cpu_chill();
|
||||
goto again;
|
||||
}
|
||||
dentry->d_flags &= ~DCACHE_CANT_MOUNT;
|
||||
--- a/fs/namespace.c
|
||||
+++ b/fs/namespace.c
|
||||
@@ -14,6 +14,7 @@
|
||||
#include <linux/mnt_namespace.h>
|
||||
#include <linux/user_namespace.h>
|
||||
#include <linux/namei.h>
|
||||
+#include <linux/delay.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/init.h> /* init_rootfs */
|
||||
@@ -355,7 +356,7 @@ int __mnt_want_write(struct vfsmount *m)
|
||||
smp_mb();
|
||||
while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
|
||||
preempt_enable();
|
||||
- cpu_relax();
|
||||
+ cpu_chill();
|
||||
preempt_disable();
|
||||
}
|
||||
/*
|
|
@ -1,30 +0,0 @@
|
|||
From: Mike Galbraith <mgalbraith@suse.de>
|
||||
Date: Wed, 11 Jul 2012 22:05:20 +0000
|
||||
Subject: fs, jbd: pull your plug when waiting for space
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
With an -rt kernel, and a heavy sync IO load, tasks can jam
|
||||
up on journal locks without unplugging, which can lead to
|
||||
terminal IO starvation. Unplug and schedule when waiting for space.
|
||||
|
||||
Signed-off-by: Mike Galbraith <mgalbraith@suse.de>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Cc: Theodore Tso <tytso@mit.edu>
|
||||
Link: http://lkml.kernel.org/r/1341812414.7370.73.camel@marge.simpson.net
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
fs/jbd/checkpoint.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/fs/jbd/checkpoint.c
|
||||
+++ b/fs/jbd/checkpoint.c
|
||||
@@ -129,6 +129,8 @@ void __log_wait_for_space(journal_t *jou
|
||||
if (journal->j_flags & JFS_ABORT)
|
||||
return;
|
||||
spin_unlock(&journal->j_state_lock);
|
||||
+ if (current->plug)
|
||||
+ io_schedule();
|
||||
mutex_lock(&journal->j_checkpoint_mutex);
|
||||
|
||||
/*
|
|
@ -1,101 +0,0 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 18 Mar 2011 10:11:25 +0100
|
||||
Subject: fs: jbd/jbd2: Make state lock and journal head lock rt safe
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
bit_spin_locks break under RT.
|
||||
|
||||
Based on a previous patch from Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
--
|
||||
|
||||
include/linux/buffer_head.h | 10 ++++++++++
|
||||
include/linux/jbd_common.h | 24 ++++++++++++++++++++++++
|
||||
2 files changed, 34 insertions(+)
|
||||
|
||||
--- a/include/linux/buffer_head.h
|
||||
+++ b/include/linux/buffer_head.h
|
||||
@@ -77,6 +77,11 @@ struct buffer_head {
|
||||
atomic_t b_count; /* users using this buffer_head */
|
||||
#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
spinlock_t b_uptodate_lock;
|
||||
+#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
|
||||
+ defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
|
||||
+ spinlock_t b_state_lock;
|
||||
+ spinlock_t b_journal_head_lock;
|
||||
+#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -108,6 +113,11 @@ static inline void buffer_head_init_lock
|
||||
{
|
||||
#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
spin_lock_init(&bh->b_uptodate_lock);
|
||||
+#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
|
||||
+ defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
|
||||
+ spin_lock_init(&bh->b_state_lock);
|
||||
+ spin_lock_init(&bh->b_journal_head_lock);
|
||||
+#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
--- a/include/linux/jbd_common.h
|
||||
+++ b/include/linux/jbd_common.h
|
||||
@@ -15,32 +15,56 @@ static inline struct journal_head *bh2jh
|
||||
|
||||
static inline void jbd_lock_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_lock(BH_State, &bh->b_state);
|
||||
+#else
|
||||
+ spin_lock(&bh->b_state_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline int jbd_trylock_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
return bit_spin_trylock(BH_State, &bh->b_state);
|
||||
+#else
|
||||
+ return spin_trylock(&bh->b_state_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
return bit_spin_is_locked(BH_State, &bh->b_state);
|
||||
+#else
|
||||
+ return spin_is_locked(&bh->b_state_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline void jbd_unlock_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_unlock(BH_State, &bh->b_state);
|
||||
+#else
|
||||
+ spin_unlock(&bh->b_state_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_lock(BH_JournalHead, &bh->b_state);
|
||||
+#else
|
||||
+ spin_lock(&bh->b_journal_head_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_unlock(BH_JournalHead, &bh->b_state);
|
||||
+#else
|
||||
+ spin_unlock(&bh->b_journal_head_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,32 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 17 Feb 2014 17:30:03 +0100
|
||||
Subject: fs: jbd2: pull your plug when waiting for space
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Two cps in parallel managed to stall the the ext4 fs. It seems that
|
||||
journal code is either waiting for locks or sleeping waiting for
|
||||
something to happen. This seems similar to what Mike observed on ext3,
|
||||
here is his description:
|
||||
|
||||
|With an -rt kernel, and a heavy sync IO load, tasks can jam
|
||||
|up on journal locks without unplugging, which can lead to
|
||||
|terminal IO starvation. Unplug and schedule when waiting
|
||||
|for space.
|
||||
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/jbd2/checkpoint.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/fs/jbd2/checkpoint.c
|
||||
+++ b/fs/jbd2/checkpoint.c
|
||||
@@ -116,6 +116,8 @@ void __jbd2_log_wait_for_space(journal_t
|
||||
nblocks = jbd2_space_needed(journal);
|
||||
while (jbd2_log_space_left(journal) < nblocks) {
|
||||
write_unlock(&journal->j_state_lock);
|
||||
+ if (current->plug)
|
||||
+ io_schedule();
|
||||
mutex_lock(&journal->j_checkpoint_mutex);
|
||||
|
||||
/*
|
|
@ -1,31 +0,0 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 19 Jul 2009 08:44:27 -0500
|
||||
Subject: fs: namespace preemption fix
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
On RT we cannot loop with preemption disabled here as
|
||||
mnt_make_readonly() might have been preempted. We can safely enable
|
||||
preemption while waiting for MNT_WRITE_HOLD to be cleared. Safe on !RT
|
||||
as well.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
fs/namespace.c | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/fs/namespace.c
|
||||
+++ b/fs/namespace.c
|
||||
@@ -353,8 +353,11 @@ int __mnt_want_write(struct vfsmount *m)
|
||||
* incremented count after it has set MNT_WRITE_HOLD.
|
||||
*/
|
||||
smp_mb();
|
||||
- while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
|
||||
+ while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
|
||||
+ preempt_enable();
|
||||
cpu_relax();
|
||||
+ preempt_disable();
|
||||
+ }
|
||||
/*
|
||||
* After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
|
||||
* be set to match its requirements. So we must not load that until
|
|
@ -1,60 +0,0 @@
|
|||
From: Mike Galbraith <efault@gmx.de>
|
||||
Date: Fri, 3 Jul 2009 08:44:12 -0500
|
||||
Subject: fs: ntfs: disable interrupt only on !RT
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
On Sat, 2007-10-27 at 11:44 +0200, Ingo Molnar wrote:
|
||||
> * Nick Piggin <nickpiggin@yahoo.com.au> wrote:
|
||||
>
|
||||
> > > [10138.175796] [<c0105de3>] show_trace+0x12/0x14
|
||||
> > > [10138.180291] [<c0105dfb>] dump_stack+0x16/0x18
|
||||
> > > [10138.184769] [<c011609f>] native_smp_call_function_mask+0x138/0x13d
|
||||
> > > [10138.191117] [<c0117606>] smp_call_function+0x1e/0x24
|
||||
> > > [10138.196210] [<c012f85c>] on_each_cpu+0x25/0x50
|
||||
> > > [10138.200807] [<c0115c74>] flush_tlb_all+0x1e/0x20
|
||||
> > > [10138.205553] [<c016caaf>] kmap_high+0x1b6/0x417
|
||||
> > > [10138.210118] [<c011ec88>] kmap+0x4d/0x4f
|
||||
> > > [10138.214102] [<c026a9d8>] ntfs_end_buffer_async_read+0x228/0x2f9
|
||||
> > > [10138.220163] [<c01a0e9e>] end_bio_bh_io_sync+0x26/0x3f
|
||||
> > > [10138.225352] [<c01a2b09>] bio_endio+0x42/0x6d
|
||||
> > > [10138.229769] [<c02c2a08>] __end_that_request_first+0x115/0x4ac
|
||||
> > > [10138.235682] [<c02c2da7>] end_that_request_chunk+0x8/0xa
|
||||
> > > [10138.241052] [<c0365943>] ide_end_request+0x55/0x10a
|
||||
> > > [10138.246058] [<c036dae3>] ide_dma_intr+0x6f/0xac
|
||||
> > > [10138.250727] [<c0366d83>] ide_intr+0x93/0x1e0
|
||||
> > > [10138.255125] [<c015afb4>] handle_IRQ_event+0x5c/0xc9
|
||||
> >
|
||||
> > Looks like ntfs is kmap()ing from interrupt context. Should be using
|
||||
> > kmap_atomic instead, I think.
|
||||
>
|
||||
> it's not atomic interrupt context but irq thread context - and -rt
|
||||
> remaps kmap_atomic() to kmap() internally.
|
||||
|
||||
Hm. Looking at the change to mm/bounce.c, perhaps I should do this
|
||||
instead?
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
fs/ntfs/aops.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/fs/ntfs/aops.c
|
||||
+++ b/fs/ntfs/aops.c
|
||||
@@ -143,13 +143,13 @@ static void ntfs_end_buffer_async_read(s
|
||||
recs = PAGE_CACHE_SIZE / rec_size;
|
||||
/* Should have been verified before we got here... */
|
||||
BUG_ON(!recs);
|
||||
- local_irq_save(flags);
|
||||
+ local_irq_save_nort(flags);
|
||||
kaddr = kmap_atomic(page);
|
||||
for (i = 0; i < recs; i++)
|
||||
post_read_mst_fixup((NTFS_RECORD*)(kaddr +
|
||||
i * rec_size), rec_size);
|
||||
kunmap_atomic(kaddr);
|
||||
- local_irq_restore(flags);
|
||||
+ local_irq_restore_nort(flags);
|
||||
flush_dcache_page(page);
|
||||
if (likely(page_uptodate && !PageError(page)))
|
||||
SetPageUptodate(page);
|
|
@ -1,162 +0,0 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 18 Mar 2011 09:18:52 +0100
|
||||
Subject: buffer_head: Replace bh_uptodate_lock for -rt
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Wrap the bit_spin_lock calls into a separate inline and add the RT
|
||||
replacements with a real spinlock.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
fs/buffer.c | 21 +++++++--------------
|
||||
fs/ntfs/aops.c | 10 +++-------
|
||||
include/linux/buffer_head.h | 34 ++++++++++++++++++++++++++++++++++
|
||||
3 files changed, 44 insertions(+), 21 deletions(-)
|
||||
|
||||
--- a/fs/buffer.c
|
||||
+++ b/fs/buffer.c
|
||||
@@ -301,8 +301,7 @@ static void end_buffer_async_read(struct
|
||||
* decide that the page is now completely done.
|
||||
*/
|
||||
first = page_buffers(page);
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
|
||||
+ flags = bh_uptodate_lock_irqsave(first);
|
||||
clear_buffer_async_read(bh);
|
||||
unlock_buffer(bh);
|
||||
tmp = bh;
|
||||
@@ -315,8 +314,7 @@ static void end_buffer_async_read(struct
|
||||
}
|
||||
tmp = tmp->b_this_page;
|
||||
} while (tmp != bh);
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
|
||||
/*
|
||||
* If none of the buffers had errors and they are all
|
||||
@@ -328,9 +326,7 @@ static void end_buffer_async_read(struct
|
||||
return;
|
||||
|
||||
still_busy:
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
- return;
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -358,8 +354,7 @@ void end_buffer_async_write(struct buffe
|
||||
}
|
||||
|
||||
first = page_buffers(page);
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
|
||||
+ flags = bh_uptodate_lock_irqsave(first);
|
||||
|
||||
clear_buffer_async_write(bh);
|
||||
unlock_buffer(bh);
|
||||
@@ -371,15 +366,12 @@ void end_buffer_async_write(struct buffe
|
||||
}
|
||||
tmp = tmp->b_this_page;
|
||||
}
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
end_page_writeback(page);
|
||||
return;
|
||||
|
||||
still_busy:
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
- return;
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(end_buffer_async_write);
|
||||
|
||||
@@ -3325,6 +3317,7 @@ struct buffer_head *alloc_buffer_head(gf
|
||||
struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
|
||||
if (ret) {
|
||||
INIT_LIST_HEAD(&ret->b_assoc_buffers);
|
||||
+ buffer_head_init_locks(ret);
|
||||
preempt_disable();
|
||||
__this_cpu_inc(bh_accounting.nr);
|
||||
recalc_bh_state();
|
||||
--- a/fs/ntfs/aops.c
|
||||
+++ b/fs/ntfs/aops.c
|
||||
@@ -107,8 +107,7 @@ static void ntfs_end_buffer_async_read(s
|
||||
"0x%llx.", (unsigned long long)bh->b_blocknr);
|
||||
}
|
||||
first = page_buffers(page);
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
|
||||
+ flags = bh_uptodate_lock_irqsave(first);
|
||||
clear_buffer_async_read(bh);
|
||||
unlock_buffer(bh);
|
||||
tmp = bh;
|
||||
@@ -123,8 +122,7 @@ static void ntfs_end_buffer_async_read(s
|
||||
}
|
||||
tmp = tmp->b_this_page;
|
||||
} while (tmp != bh);
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
/*
|
||||
* If none of the buffers had errors then we can set the page uptodate,
|
||||
* but we first have to perform the post read mst fixups, if the
|
||||
@@ -159,9 +157,7 @@ static void ntfs_end_buffer_async_read(s
|
||||
unlock_page(page);
|
||||
return;
|
||||
still_busy:
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
- return;
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
--- a/include/linux/buffer_head.h
|
||||
+++ b/include/linux/buffer_head.h
|
||||
@@ -75,8 +75,42 @@ struct buffer_head {
|
||||
struct address_space *b_assoc_map; /* mapping this buffer is
|
||||
associated with */
|
||||
atomic_t b_count; /* users using this buffer_head */
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ spinlock_t b_uptodate_lock;
|
||||
+#endif
|
||||
};
|
||||
|
||||
+static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
|
||||
+{
|
||||
+ unsigned long flags;
|
||||
+
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
+ local_irq_save(flags);
|
||||
+ bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
|
||||
+#else
|
||||
+ spin_lock_irqsave(&bh->b_uptodate_lock, flags);
|
||||
+#endif
|
||||
+ return flags;
|
||||
+}
|
||||
+
|
||||
+static inline void
|
||||
+bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
|
||||
+{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
+ bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
|
||||
+ local_irq_restore(flags);
|
||||
+#else
|
||||
+ spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+static inline void buffer_head_init_locks(struct buffer_head *bh)
|
||||
+{
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ spin_lock_init(&bh->b_uptodate_lock);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
|
||||
* and buffer_foo() functions.
|
|
@ -1,74 +0,0 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 17 Jul 2011 21:56:42 +0200
|
||||
Subject: trace: Add migrate-disabled counter to tracing output
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/ftrace_event.h | 2 ++
|
||||
kernel/trace/trace.c | 9 ++++++---
|
||||
kernel/trace/trace_events.c | 2 ++
|
||||
kernel/trace/trace_output.c | 5 +++++
|
||||
4 files changed, 15 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/linux/ftrace_event.h
|
||||
+++ b/include/linux/ftrace_event.h
|
||||
@@ -66,6 +66,8 @@ struct trace_entry {
|
||||
unsigned char flags;
|
||||
unsigned char preempt_count;
|
||||
int pid;
|
||||
+ unsigned short migrate_disable;
|
||||
+ unsigned short padding;
|
||||
};
|
||||
|
||||
#define FTRACE_MAX_EVENT \
|
||||
--- a/kernel/trace/trace.c
|
||||
+++ b/kernel/trace/trace.c
|
||||
@@ -1641,6 +1641,8 @@ tracing_generic_entry_update(struct trac
|
||||
((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
|
||||
(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
|
||||
(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
|
||||
+
|
||||
+ entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
|
||||
|
||||
@@ -2563,9 +2565,10 @@ static void print_lat_help_header(struct
|
||||
"# | / _----=> need-resched \n"
|
||||
"# || / _---=> hardirq/softirq \n"
|
||||
"# ||| / _--=> preempt-depth \n"
|
||||
- "# |||| / delay \n"
|
||||
- "# cmd pid ||||| time | caller \n"
|
||||
- "# \\ / ||||| \\ | / \n");
|
||||
+ "# |||| / _--=> migrate-disable\n"
|
||||
+ "# ||||| / delay \n"
|
||||
+ "# cmd pid |||||| time | caller \n"
|
||||
+ "# \\ / ||||| \\ | / \n");
|
||||
}
|
||||
|
||||
static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
|
||||
--- a/kernel/trace/trace_events.c
|
||||
+++ b/kernel/trace/trace_events.c
|
||||
@@ -162,6 +162,8 @@ static int trace_define_common_fields(vo
|
||||
__common_field(unsigned char, flags);
|
||||
__common_field(unsigned char, preempt_count);
|
||||
__common_field(int, pid);
|
||||
+ __common_field(unsigned short, migrate_disable);
|
||||
+ __common_field(unsigned short, padding);
|
||||
|
||||
return ret;
|
||||
}
|
||||
--- a/kernel/trace/trace_output.c
|
||||
+++ b/kernel/trace/trace_output.c
|
||||
@@ -472,6 +472,11 @@ int trace_print_lat_fmt(struct trace_seq
|
||||
else
|
||||
trace_seq_putc(s, '.');
|
||||
|
||||
+ if (entry->migrate_disable)
|
||||
+ trace_seq_printf(s, "%x", entry->migrate_disable);
|
||||
+ else
|
||||
+ trace_seq_putc(s, '.');
|
||||
+
|
||||
return !trace_seq_has_overflowed(s);
|
||||
}
|
||||
|
|
@ -1,224 +0,0 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 18 Feb 2015 20:17:31 +0100
|
||||
Subject: futex: avoid double wake up in PI futex wait / wake on -RT
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The boosted priority is reverted after the unlock but before the
|
||||
futex_hash_bucket (hb) has been accessed. The result is that we boost the
|
||||
task, deboost the task, boost again for the hb lock, deboost again.
|
||||
A sched trace of this scenario looks the following:
|
||||
|
||||
| med_prio-93 sched_wakeup: comm=high_prio pid=92 prio=9 success=1 target_cpu=000
|
||||
| med_prio-93 sched_switch: prev_comm=med_prio prev_pid=93 prev_prio=29 prev_state=R ==> next_comm=high_prio next_pid=92 next_prio=9
|
||||
|high_prio-92 sched_pi_setprio: comm=low_prio pid=91 oldprio=120 newprio=9
|
||||
|high_prio-92 sched_switch: prev_comm=high_prio prev_pid=92 prev_prio=9 prev_state=S ==> next_comm=low_prio next_pid=91 next_prio=9
|
||||
| low_prio-91 sched_wakeup: comm=high_prio pid=92 prio=9 success=1 target_cpu=000
|
||||
| low_prio-91 sched_pi_setprio: comm=low_prio pid=91 oldprio=9 newprio=120
|
||||
| low_prio-91 sched_switch: prev_comm=low_prio prev_pid=91 prev_prio=120 prev_state=R+ ==> next_comm=high_prio next_pid=92 next_prio=9
|
||||
|high_prio-92 sched_pi_setprio: comm=low_prio pid=91 oldprio=120 newprio=9
|
||||
|high_prio-92 sched_switch: prev_comm=high_prio prev_pid=92 prev_prio=9 prev_state=D ==> next_comm=low_prio next_pid=91 next_prio=9
|
||||
| low_prio-91 sched_wakeup: comm=high_prio pid=92 prio=9 success=1 target_cpu=000
|
||||
| low_prio-91 sched_pi_setprio: comm=low_prio pid=91 oldprio=9 newprio=120
|
||||
| low_prio-91 sched_switch: prev_comm=low_prio prev_pid=91 prev_prio=120 prev_state=R+ ==> next_comm=high_prio next_pid=92 next_prio=9
|
||||
|
||||
We see four sched_pi_setprio() invocation but ideally two would be enough.
|
||||
The patch tries to avoid the double wakeup by a wake up once the hb lock is
|
||||
released. The same test case:
|
||||
|
||||
| med_prio-21 sched_wakeup: comm=high_prio pid=20 prio=9 success=1 target_cpu=000
|
||||
| med_prio-21 sched_switch: prev_comm=med_prio prev_pid=21 prev_prio=29 prev_state=R ==> next_comm=high_prio next_pid=20 next_prio=9
|
||||
|high_prio-20 sched_pi_setprio: comm=low_prio pid=19 oldprio=120 newprio=9
|
||||
|high_prio-20 sched_switch: prev_comm=high_prio prev_pid=20 prev_prio=9 prev_state=S ==> next_comm=low_prio next_pid=19 next_prio=9
|
||||
| low_prio-19 sched_wakeup: comm=high_prio pid=20 prio=9 success=1 target_cpu=000
|
||||
| low_prio-19 sched_pi_setprio: comm=low_prio pid=19 oldprio=9 newprio=120
|
||||
| low_prio-19 sched_switch: prev_comm=low_prio prev_pid=19 prev_prio=120 prev_state=R+ ==> next_comm=high_prio next_pid=20 next_prio=9
|
||||
|
||||
only two sched_pi_setprio() invocations as one would expect and see
|
||||
without -RT.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/futex.c | 32 +++++++++++++++++++++++++++++---
|
||||
kernel/locking/rtmutex.c | 40 +++++++++++++++++++++++++++++-----------
|
||||
kernel/locking/rtmutex_common.h | 4 ++++
|
||||
3 files changed, 62 insertions(+), 14 deletions(-)
|
||||
|
||||
--- a/kernel/futex.c
|
||||
+++ b/kernel/futex.c
|
||||
@@ -1117,11 +1117,13 @@ static void mark_wake_futex(struct wake_
|
||||
q->lock_ptr = NULL;
|
||||
}
|
||||
|
||||
-static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
|
||||
+static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
|
||||
+ struct futex_hash_bucket *hb)
|
||||
{
|
||||
struct task_struct *new_owner;
|
||||
struct futex_pi_state *pi_state = this->pi_state;
|
||||
u32 uninitialized_var(curval), newval;
|
||||
+ bool deboost;
|
||||
int ret = 0;
|
||||
|
||||
if (!pi_state)
|
||||
@@ -1173,7 +1175,17 @@ static int wake_futex_pi(u32 __user *uad
|
||||
raw_spin_unlock_irq(&new_owner->pi_lock);
|
||||
|
||||
raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
|
||||
- rt_mutex_unlock(&pi_state->pi_mutex);
|
||||
+
|
||||
+ deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex);
|
||||
+
|
||||
+ /*
|
||||
+ * We deboost after dropping hb->lock. That prevents a double
|
||||
+ * wakeup on RT.
|
||||
+ */
|
||||
+ spin_unlock(&hb->lock);
|
||||
+
|
||||
+ if (deboost)
|
||||
+ rt_mutex_adjust_prio(current);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2413,13 +2425,26 @@ static int futex_unlock_pi(u32 __user *u
|
||||
*/
|
||||
match = futex_top_waiter(hb, &key);
|
||||
if (match) {
|
||||
- ret = wake_futex_pi(uaddr, uval, match);
|
||||
+ ret = wake_futex_pi(uaddr, uval, match, hb);
|
||||
+
|
||||
+ /*
|
||||
+ * In case of success wake_futex_pi dropped the hash
|
||||
+ * bucket lock.
|
||||
+ */
|
||||
+ if (!ret)
|
||||
+ goto out_putkey;
|
||||
+
|
||||
/*
|
||||
* The atomic access to the futex value generated a
|
||||
* pagefault, so retry the user-access and the wakeup:
|
||||
*/
|
||||
if (ret == -EFAULT)
|
||||
goto pi_faulted;
|
||||
+
|
||||
+ /*
|
||||
+ * wake_futex_pi has detected invalid state. Tell user
|
||||
+ * space.
|
||||
+ */
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
@@ -2440,6 +2465,7 @@ static int futex_unlock_pi(u32 __user *u
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&hb->lock);
|
||||
+out_putkey:
|
||||
put_futex_key(&key);
|
||||
return ret;
|
||||
|
||||
--- a/kernel/locking/rtmutex.c
|
||||
+++ b/kernel/locking/rtmutex.c
|
||||
@@ -300,7 +300,7 @@ static void __rt_mutex_adjust_prio(struc
|
||||
* of task. We do not use the spin_xx_mutex() variants here as we are
|
||||
* outside of the debug path.)
|
||||
*/
|
||||
-static void rt_mutex_adjust_prio(struct task_struct *task)
|
||||
+void rt_mutex_adjust_prio(struct task_struct *task)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
@@ -957,8 +957,9 @@ static int task_blocks_on_rt_mutex(struc
|
||||
/*
|
||||
* Wake up the next waiter on the lock.
|
||||
*
|
||||
- * Remove the top waiter from the current tasks pi waiter list and
|
||||
- * wake it up.
|
||||
+ * Remove the top waiter from the current tasks pi waiter list,
|
||||
+ * wake it up and return whether the current task needs to undo
|
||||
+ * a potential priority boosting.
|
||||
*
|
||||
* Called with lock->wait_lock held.
|
||||
*/
|
||||
@@ -1255,7 +1256,7 @@ static inline int rt_mutex_slowtrylock(s
|
||||
/*
|
||||
* Slow path to release a rt-mutex:
|
||||
*/
|
||||
-static void __sched
|
||||
+static bool __sched
|
||||
rt_mutex_slowunlock(struct rt_mutex *lock)
|
||||
{
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
@@ -1298,7 +1299,7 @@ rt_mutex_slowunlock(struct rt_mutex *loc
|
||||
while (!rt_mutex_has_waiters(lock)) {
|
||||
/* Drops lock->wait_lock ! */
|
||||
if (unlock_rt_mutex_safe(lock) == true)
|
||||
- return;
|
||||
+ return false;
|
||||
/* Relock the rtmutex and try again */
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
}
|
||||
@@ -1311,8 +1312,7 @@ rt_mutex_slowunlock(struct rt_mutex *loc
|
||||
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
|
||||
- /* Undo pi boosting if necessary: */
|
||||
- rt_mutex_adjust_prio(current);
|
||||
+ return true;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1363,12 +1363,14 @@ rt_mutex_fasttrylock(struct rt_mutex *lo
|
||||
|
||||
static inline void
|
||||
rt_mutex_fastunlock(struct rt_mutex *lock,
|
||||
- void (*slowfn)(struct rt_mutex *lock))
|
||||
+ bool (*slowfn)(struct rt_mutex *lock))
|
||||
{
|
||||
- if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
|
||||
+ if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
|
||||
rt_mutex_deadlock_account_unlock(current);
|
||||
- else
|
||||
- slowfn(lock);
|
||||
+ } else if (slowfn(lock)) {
|
||||
+ /* Undo pi boosting if necessary: */
|
||||
+ rt_mutex_adjust_prio(current);
|
||||
+ }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1463,6 +1465,22 @@ void __sched rt_mutex_unlock(struct rt_m
|
||||
EXPORT_SYMBOL_GPL(rt_mutex_unlock);
|
||||
|
||||
/**
|
||||
+ * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock
|
||||
+ * @lock: the rt_mutex to be unlocked
|
||||
+ *
|
||||
+ * Returns: true/false indicating whether priority adjustment is
|
||||
+ * required or not.
|
||||
+ */
|
||||
+bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
|
||||
+{
|
||||
+ if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
|
||||
+ rt_mutex_deadlock_account_unlock(current);
|
||||
+ return false;
|
||||
+ }
|
||||
+ return rt_mutex_slowunlock(lock);
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
* rt_mutex_destroy - mark a mutex unusable
|
||||
* @lock: the mutex to be destroyed
|
||||
*
|
||||
--- a/kernel/locking/rtmutex_common.h
|
||||
+++ b/kernel/locking/rtmutex_common.h
|
||||
@@ -132,6 +132,10 @@ extern int rt_mutex_finish_proxy_lock(st
|
||||
struct rt_mutex_waiter *waiter);
|
||||
extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
|
||||
|
||||
+extern bool rt_mutex_futex_unlock(struct rt_mutex *lock);
|
||||
+
|
||||
+extern void rt_mutex_adjust_prio(struct task_struct *task);
|
||||
+
|
||||
#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
# include "rtmutex-debug.h"
|
||||
#else
|
|
@ -1,114 +0,0 @@
|
|||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Tue, 14 Jul 2015 14:26:34 +0200
|
||||
Subject: futex: Fix bug on when a requeued RT task times out
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Requeue with timeout causes a bug with PREEMPT_RT_FULL.
|
||||
|
||||
The bug comes from a timed out condition.
|
||||
|
||||
|
||||
TASK 1 TASK 2
|
||||
------ ------
|
||||
futex_wait_requeue_pi()
|
||||
futex_wait_queue_me()
|
||||
<timed out>
|
||||
|
||||
double_lock_hb();
|
||||
|
||||
raw_spin_lock(pi_lock);
|
||||
if (current->pi_blocked_on) {
|
||||
} else {
|
||||
current->pi_blocked_on = PI_WAKE_INPROGRESS;
|
||||
run_spin_unlock(pi_lock);
|
||||
spin_lock(hb->lock); <-- blocked!
|
||||
|
||||
|
||||
plist_for_each_entry_safe(this) {
|
||||
rt_mutex_start_proxy_lock();
|
||||
task_blocks_on_rt_mutex();
|
||||
BUG_ON(task->pi_blocked_on)!!!!
|
||||
|
||||
The BUG_ON() actually has a check for PI_WAKE_INPROGRESS, but the
|
||||
problem is that, after TASK 1 sets PI_WAKE_INPROGRESS, it then tries to
|
||||
grab the hb->lock, which it fails to do so. As the hb->lock is a mutex,
|
||||
it will block and set the "pi_blocked_on" to the hb->lock.
|
||||
|
||||
When TASK 2 goes to requeue it, the check for PI_WAKE_INPROGESS fails
|
||||
because the task1's pi_blocked_on is no longer set to that, but instead,
|
||||
set to the hb->lock.
|
||||
|
||||
The fix:
|
||||
|
||||
When calling rt_mutex_start_proxy_lock() a check is made to see
|
||||
if the proxy tasks pi_blocked_on is set. If so, exit out early.
|
||||
Otherwise set it to a new flag PI_REQUEUE_INPROGRESS, which notifies
|
||||
the proxy task that it is being requeued, and will handle things
|
||||
appropriately.
|
||||
|
||||
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/locking/rtmutex.c | 32 +++++++++++++++++++++++++++++++-
|
||||
kernel/locking/rtmutex_common.h | 1 +
|
||||
2 files changed, 32 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/locking/rtmutex.c
|
||||
+++ b/kernel/locking/rtmutex.c
|
||||
@@ -71,7 +71,8 @@ static void fixup_rt_mutex_waiters(struc
|
||||
|
||||
static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
|
||||
{
|
||||
- return waiter && waiter != PI_WAKEUP_INPROGRESS;
|
||||
+ return waiter && waiter != PI_WAKEUP_INPROGRESS &&
|
||||
+ waiter != PI_REQUEUE_INPROGRESS;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1603,6 +1604,35 @@ int rt_mutex_start_proxy_lock(struct rt_
|
||||
return 1;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ /*
|
||||
+ * In PREEMPT_RT there's an added race.
|
||||
+ * If the task, that we are about to requeue, times out,
|
||||
+ * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
|
||||
+ * to skip this task. But right after the task sets
|
||||
+ * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
|
||||
+ * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
|
||||
+ * This will replace the PI_WAKEUP_INPROGRESS with the actual
|
||||
+ * lock that it blocks on. We *must not* place this task
|
||||
+ * on this proxy lock in that case.
|
||||
+ *
|
||||
+ * To prevent this race, we first take the task's pi_lock
|
||||
+ * and check if it has updated its pi_blocked_on. If it has,
|
||||
+ * we assume that it woke up and we return -EAGAIN.
|
||||
+ * Otherwise, we set the task's pi_blocked_on to
|
||||
+ * PI_REQUEUE_INPROGRESS, so that if the task is waking up
|
||||
+ * it will know that we are in the process of requeuing it.
|
||||
+ */
|
||||
+ raw_spin_lock_irq(&task->pi_lock);
|
||||
+ if (task->pi_blocked_on) {
|
||||
+ raw_spin_unlock_irq(&task->pi_lock);
|
||||
+ raw_spin_unlock(&lock->wait_lock);
|
||||
+ return -EAGAIN;
|
||||
+ }
|
||||
+ task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
|
||||
+ raw_spin_unlock_irq(&task->pi_lock);
|
||||
+#endif
|
||||
+
|
||||
/* We enforce deadlock detection for futexes */
|
||||
ret = task_blocks_on_rt_mutex(lock, waiter, task,
|
||||
RT_MUTEX_FULL_CHAINWALK);
|
||||
--- a/kernel/locking/rtmutex_common.h
|
||||
+++ b/kernel/locking/rtmutex_common.h
|
||||
@@ -120,6 +120,7 @@ enum rtmutex_chainwalk {
|
||||
* PI-futex support (proxy locking functions, etc.):
|
||||
*/
|
||||
#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
|
||||
+#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2)
|
||||
|
||||
extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
|
||||
extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
|
|
@ -1,38 +0,0 @@
|
|||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:57 -0500
|
||||
Subject: genirq: Disable irqpoll on -rt
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Creates long latencies for no value
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
kernel/irq/spurious.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
--- a/kernel/irq/spurious.c
|
||||
+++ b/kernel/irq/spurious.c
|
||||
@@ -444,6 +444,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable ir
|
||||
|
||||
static int __init irqfixup_setup(char *str)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
|
||||
+ return 1;
|
||||
+#endif
|
||||
irqfixup = 1;
|
||||
printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
|
||||
printk(KERN_WARNING "This may impact system performance.\n");
|
||||
@@ -456,6 +460,10 @@ module_param(irqfixup, int, 0644);
|
||||
|
||||
static int __init irqpoll_setup(char *str)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
|
||||
+ return 1;
|
||||
+#endif
|
||||
irqfixup = 2;
|
||||
printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
|
||||
"enabled\n");
|
|
@ -1,145 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 21 Aug 2013 17:48:46 +0200
|
||||
Subject: genirq: Do not invoke the affinity callback via a workqueue on RT
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Joe Korty reported, that __irq_set_affinity_locked() schedules a
|
||||
workqueue while holding a rawlock which results in a might_sleep()
|
||||
warning.
|
||||
This patch moves the invokation into a process context so that we only
|
||||
wakeup() a process while holding the lock.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/interrupt.h | 1
|
||||
kernel/irq/manage.c | 79 ++++++++++++++++++++++++++++++++++++++++++++--
|
||||
2 files changed, 77 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/linux/interrupt.h
|
||||
+++ b/include/linux/interrupt.h
|
||||
@@ -215,6 +215,7 @@ struct irq_affinity_notify {
|
||||
unsigned int irq;
|
||||
struct kref kref;
|
||||
struct work_struct work;
|
||||
+ struct list_head list;
|
||||
void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
|
||||
void (*release)(struct kref *ref);
|
||||
};
|
||||
--- a/kernel/irq/manage.c
|
||||
+++ b/kernel/irq/manage.c
|
||||
@@ -181,6 +181,62 @@ static inline void
|
||||
irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
|
||||
#endif
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+static void _irq_affinity_notify(struct irq_affinity_notify *notify);
|
||||
+static struct task_struct *set_affinity_helper;
|
||||
+static LIST_HEAD(affinity_list);
|
||||
+static DEFINE_RAW_SPINLOCK(affinity_list_lock);
|
||||
+
|
||||
+static int set_affinity_thread(void *unused)
|
||||
+{
|
||||
+ while (1) {
|
||||
+ struct irq_affinity_notify *notify;
|
||||
+ int empty;
|
||||
+
|
||||
+ set_current_state(TASK_INTERRUPTIBLE);
|
||||
+
|
||||
+ raw_spin_lock_irq(&affinity_list_lock);
|
||||
+ empty = list_empty(&affinity_list);
|
||||
+ raw_spin_unlock_irq(&affinity_list_lock);
|
||||
+
|
||||
+ if (empty)
|
||||
+ schedule();
|
||||
+ if (kthread_should_stop())
|
||||
+ break;
|
||||
+ set_current_state(TASK_RUNNING);
|
||||
+try_next:
|
||||
+ notify = NULL;
|
||||
+
|
||||
+ raw_spin_lock_irq(&affinity_list_lock);
|
||||
+ if (!list_empty(&affinity_list)) {
|
||||
+ notify = list_first_entry(&affinity_list,
|
||||
+ struct irq_affinity_notify, list);
|
||||
+ list_del_init(¬ify->list);
|
||||
+ }
|
||||
+ raw_spin_unlock_irq(&affinity_list_lock);
|
||||
+
|
||||
+ if (!notify)
|
||||
+ continue;
|
||||
+ _irq_affinity_notify(notify);
|
||||
+ goto try_next;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void init_helper_thread(void)
|
||||
+{
|
||||
+ if (set_affinity_helper)
|
||||
+ return;
|
||||
+ set_affinity_helper = kthread_run(set_affinity_thread, NULL,
|
||||
+ "affinity-cb");
|
||||
+ WARN_ON(IS_ERR(set_affinity_helper));
|
||||
+}
|
||||
+#else
|
||||
+
|
||||
+static inline void init_helper_thread(void) { }
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
|
||||
bool force)
|
||||
{
|
||||
@@ -220,7 +276,17 @@ int irq_set_affinity_locked(struct irq_d
|
||||
|
||||
if (desc->affinity_notify) {
|
||||
kref_get(&desc->affinity_notify->kref);
|
||||
+
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ raw_spin_lock(&affinity_list_lock);
|
||||
+ if (list_empty(&desc->affinity_notify->list))
|
||||
+ list_add_tail(&affinity_list,
|
||||
+ &desc->affinity_notify->list);
|
||||
+ raw_spin_unlock(&affinity_list_lock);
|
||||
+ wake_up_process(set_affinity_helper);
|
||||
+#else
|
||||
schedule_work(&desc->affinity_notify->work);
|
||||
+#endif
|
||||
}
|
||||
irqd_set(data, IRQD_AFFINITY_SET);
|
||||
|
||||
@@ -258,10 +324,8 @@ int irq_set_affinity_hint(unsigned int i
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
|
||||
|
||||
-static void irq_affinity_notify(struct work_struct *work)
|
||||
+static void _irq_affinity_notify(struct irq_affinity_notify *notify)
|
||||
{
|
||||
- struct irq_affinity_notify *notify =
|
||||
- container_of(work, struct irq_affinity_notify, work);
|
||||
struct irq_desc *desc = irq_to_desc(notify->irq);
|
||||
cpumask_var_t cpumask;
|
||||
unsigned long flags;
|
||||
@@ -283,6 +347,13 @@ static void irq_affinity_notify(struct w
|
||||
kref_put(¬ify->kref, notify->release);
|
||||
}
|
||||
|
||||
+static void irq_affinity_notify(struct work_struct *work)
|
||||
+{
|
||||
+ struct irq_affinity_notify *notify =
|
||||
+ container_of(work, struct irq_affinity_notify, work);
|
||||
+ _irq_affinity_notify(notify);
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* irq_set_affinity_notifier - control notification of IRQ affinity changes
|
||||
* @irq: Interrupt for which to enable/disable notification
|
||||
@@ -312,6 +383,8 @@ irq_set_affinity_notifier(unsigned int i
|
||||
notify->irq = irq;
|
||||
kref_init(¬ify->kref);
|
||||
INIT_WORK(¬ify->work, irq_affinity_notify);
|
||||
+ INIT_LIST_HEAD(¬ify->list);
|
||||
+ init_helper_thread();
|
||||
}
|
||||
|
||||
raw_spin_lock_irqsave(&desc->lock, flags);
|
|
@ -1,49 +0,0 @@
|
|||
Subject: genirq: Force interrupt thread on RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 03 Apr 2011 11:57:29 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Force threaded_irqs and optimize the code (force_irqthreads) in regard
|
||||
to this.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/interrupt.h | 6 +++++-
|
||||
kernel/irq/manage.c | 2 ++
|
||||
2 files changed, 7 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/interrupt.h
|
||||
+++ b/include/linux/interrupt.h
|
||||
@@ -377,9 +377,13 @@ extern int irq_set_irqchip_state(unsigne
|
||||
bool state);
|
||||
|
||||
#ifdef CONFIG_IRQ_FORCED_THREADING
|
||||
+# ifndef CONFIG_PREEMPT_RT_BASE
|
||||
extern bool force_irqthreads;
|
||||
+# else
|
||||
+# define force_irqthreads (true)
|
||||
+# endif
|
||||
#else
|
||||
-#define force_irqthreads (0)
|
||||
+#define force_irqthreads (false)
|
||||
#endif
|
||||
|
||||
#ifndef __ARCH_SET_SOFTIRQ_PENDING
|
||||
--- a/kernel/irq/manage.c
|
||||
+++ b/kernel/irq/manage.c
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "internals.h"
|
||||
|
||||
#ifdef CONFIG_IRQ_FORCED_THREADING
|
||||
+# ifndef CONFIG_PREEMPT_RT_BASE
|
||||
__read_mostly bool force_irqthreads;
|
||||
|
||||
static int __init setup_forced_irqthreads(char *arg)
|
||||
@@ -30,6 +31,7 @@ static int __init setup_forced_irqthread
|
||||
return 0;
|
||||
}
|
||||
early_param("threadirqs", setup_forced_irqthreads);
|
||||
+# endif
|
||||
#endif
|
||||
|
||||
static void __synchronize_hardirq(struct irq_desc *desc)
|
|
@ -1,317 +0,0 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 12 Feb 2015 16:01:13 +0100
|
||||
Subject: gpio: omap: use raw locks for locking
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
This patch converts gpio_bank.lock from a spin_lock into a
|
||||
raw_spin_lock. The call path is to access this lock is always under a
|
||||
raw_spin_lock, for instance
|
||||
- __setup_irq() holds &desc->lock with irq off
|
||||
+ __irq_set_trigger()
|
||||
+ omap_gpio_irq_type()
|
||||
|
||||
- handle_level_irq() (runs with irqs off therefore raw locks)
|
||||
+ mask_ack_irq()
|
||||
+ omap_gpio_mask_irq()
|
||||
|
||||
This fixes the obvious backtrace on -RT. However the locking vs context
|
||||
is not and this is not limited to -RT:
|
||||
- omap_gpio_irq_type() is called with IRQ off and has an conditional
|
||||
call to pm_runtime_get_sync() which may sleep. Either it may happen or
|
||||
it may not happen but pm_runtime_get_sync() should not be called with
|
||||
irqs off.
|
||||
|
||||
- omap_gpio_debounce() is holding the lock with IRQs off.
|
||||
+ omap2_set_gpio_debounce()
|
||||
+ clk_prepare_enable()
|
||||
+ clk_prepare() this one might sleep.
|
||||
The number of users of gpiod_set_debounce() / gpio_set_debounce()
|
||||
looks low but still this is not good.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/gpio/gpio-omap.c | 78 +++++++++++++++++++++++------------------------
|
||||
1 file changed, 39 insertions(+), 39 deletions(-)
|
||||
|
||||
--- a/drivers/gpio/gpio-omap.c
|
||||
+++ b/drivers/gpio/gpio-omap.c
|
||||
@@ -57,7 +57,7 @@ struct gpio_bank {
|
||||
u32 saved_datain;
|
||||
u32 level_mask;
|
||||
u32 toggle_mask;
|
||||
- spinlock_t lock;
|
||||
+ raw_spinlock_t lock;
|
||||
struct gpio_chip chip;
|
||||
struct clk *dbck;
|
||||
u32 mod_usage;
|
||||
@@ -498,14 +498,14 @@ static int omap_gpio_irq_type(struct irq
|
||||
(type & (IRQ_TYPE_LEVEL_LOW|IRQ_TYPE_LEVEL_HIGH)))
|
||||
return -EINVAL;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
retval = omap_set_gpio_triggering(bank, offset, type);
|
||||
omap_gpio_init_irq(bank, offset);
|
||||
if (!omap_gpio_is_input(bank, offset)) {
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
return -EINVAL;
|
||||
}
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
|
||||
__irq_set_handler_locked(d->irq, handle_level_irq);
|
||||
@@ -626,14 +626,14 @@ static int omap_set_gpio_wakeup(struct g
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
if (enable)
|
||||
bank->context.wake_en |= gpio_bit;
|
||||
else
|
||||
bank->context.wake_en &= ~gpio_bit;
|
||||
|
||||
writel_relaxed(bank->context.wake_en, bank->base + bank->regs->wkup_en);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -668,7 +668,7 @@ static int omap_gpio_request(struct gpio
|
||||
if (!BANK_USED(bank))
|
||||
pm_runtime_get_sync(bank->dev);
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
/* Set trigger to none. You need to enable the desired trigger with
|
||||
* request_irq() or set_irq_type(). Only do this if the IRQ line has
|
||||
* not already been requested.
|
||||
@@ -678,7 +678,7 @@ static int omap_gpio_request(struct gpio
|
||||
omap_enable_gpio_module(bank, offset);
|
||||
}
|
||||
bank->mod_usage |= BIT(offset);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -688,11 +688,11 @@ static void omap_gpio_free(struct gpio_c
|
||||
struct gpio_bank *bank = container_of(chip, struct gpio_bank, chip);
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
bank->mod_usage &= ~(BIT(offset));
|
||||
omap_disable_gpio_module(bank, offset);
|
||||
omap_reset_gpio(bank, offset);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
/*
|
||||
* If this is the last gpio to be freed in the bank,
|
||||
@@ -794,9 +794,9 @@ static unsigned int omap_gpio_irq_startu
|
||||
if (!BANK_USED(bank))
|
||||
pm_runtime_get_sync(bank->dev);
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
omap_gpio_init_irq(bank, offset);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
omap_gpio_unmask_irq(d);
|
||||
|
||||
return 0;
|
||||
@@ -808,11 +808,11 @@ static void omap_gpio_irq_shutdown(struc
|
||||
unsigned long flags;
|
||||
unsigned offset = d->hwirq;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
bank->irq_usage &= ~(BIT(offset));
|
||||
omap_disable_gpio_module(bank, offset);
|
||||
omap_reset_gpio(bank, offset);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
/*
|
||||
* If this is the last IRQ to be freed in the bank,
|
||||
@@ -836,10 +836,10 @@ static void omap_gpio_mask_irq(struct ir
|
||||
unsigned offset = d->hwirq;
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
omap_set_gpio_irqenable(bank, offset, 0);
|
||||
omap_set_gpio_triggering(bank, offset, IRQ_TYPE_NONE);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
}
|
||||
|
||||
static void omap_gpio_unmask_irq(struct irq_data *d)
|
||||
@@ -849,7 +849,7 @@ static void omap_gpio_unmask_irq(struct
|
||||
u32 trigger = irqd_get_trigger_type(d);
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
if (trigger)
|
||||
omap_set_gpio_triggering(bank, offset, trigger);
|
||||
|
||||
@@ -861,7 +861,7 @@ static void omap_gpio_unmask_irq(struct
|
||||
}
|
||||
|
||||
omap_set_gpio_irqenable(bank, offset, 1);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------*/
|
||||
@@ -874,9 +874,9 @@ static int omap_mpuio_suspend_noirq(stru
|
||||
OMAP_MPUIO_GPIO_MASKIT / bank->stride;
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
writel_relaxed(0xffff & ~bank->context.wake_en, mask_reg);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -889,9 +889,9 @@ static int omap_mpuio_resume_noirq(struc
|
||||
OMAP_MPUIO_GPIO_MASKIT / bank->stride;
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
writel_relaxed(bank->context.wake_en, mask_reg);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -937,9 +937,9 @@ static int omap_gpio_get_direction(struc
|
||||
|
||||
bank = container_of(chip, struct gpio_bank, chip);
|
||||
reg = bank->base + bank->regs->direction;
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
dir = !!(readl_relaxed(reg) & BIT(offset));
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
return dir;
|
||||
}
|
||||
|
||||
@@ -949,9 +949,9 @@ static int omap_gpio_input(struct gpio_c
|
||||
unsigned long flags;
|
||||
|
||||
bank = container_of(chip, struct gpio_bank, chip);
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
omap_set_gpio_direction(bank, offset, 1);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -973,10 +973,10 @@ static int omap_gpio_output(struct gpio_
|
||||
unsigned long flags;
|
||||
|
||||
bank = container_of(chip, struct gpio_bank, chip);
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
bank->set_dataout(bank, offset, value);
|
||||
omap_set_gpio_direction(bank, offset, 0);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -988,9 +988,9 @@ static int omap_gpio_debounce(struct gpi
|
||||
|
||||
bank = container_of(chip, struct gpio_bank, chip);
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
omap2_set_gpio_debounce(bank, offset, debounce);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1001,9 +1001,9 @@ static void omap_gpio_set(struct gpio_ch
|
||||
unsigned long flags;
|
||||
|
||||
bank = container_of(chip, struct gpio_bank, chip);
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
bank->set_dataout(bank, offset, value);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------*/
|
||||
@@ -1199,7 +1199,7 @@ static int omap_gpio_probe(struct platfo
|
||||
else
|
||||
bank->set_dataout = omap_set_gpio_dataout_mask;
|
||||
|
||||
- spin_lock_init(&bank->lock);
|
||||
+ raw_spin_lock_init(&bank->lock);
|
||||
|
||||
/* Static mapping, never released */
|
||||
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
|
||||
@@ -1246,7 +1246,7 @@ static int omap_gpio_runtime_suspend(str
|
||||
unsigned long flags;
|
||||
u32 wake_low, wake_hi;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
|
||||
/*
|
||||
* Only edges can generate a wakeup event to the PRCM.
|
||||
@@ -1299,7 +1299,7 @@ static int omap_gpio_runtime_suspend(str
|
||||
bank->get_context_loss_count(bank->dev);
|
||||
|
||||
omap_gpio_dbck_disable(bank);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1314,7 +1314,7 @@ static int omap_gpio_runtime_resume(stru
|
||||
unsigned long flags;
|
||||
int c;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
|
||||
/*
|
||||
* On the first resume during the probe, the context has not
|
||||
@@ -1350,14 +1350,14 @@ static int omap_gpio_runtime_resume(stru
|
||||
if (c != bank->context_loss_count) {
|
||||
omap_gpio_restore_context(bank);
|
||||
} else {
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!bank->workaround_enabled) {
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1412,7 +1412,7 @@ static int omap_gpio_runtime_resume(stru
|
||||
}
|
||||
|
||||
bank->workaround_enabled = false;
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,47 +0,0 @@
|
|||
From: Mike Galbraith <umgwanakikbuti@gmail.com>
|
||||
Date: Tue, 24 Mar 2015 08:14:49 +0100
|
||||
Subject: hotplug: Use set_cpus_allowed_ptr() in sync_unplug_thread()
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
do_set_cpus_allowed() is not safe vs ->sched_class change.
|
||||
|
||||
crash> bt
|
||||
PID: 11676 TASK: ffff88026f979da0 CPU: 22 COMMAND: "sync_unplug/22"
|
||||
#0 [ffff880274d25bc8] machine_kexec at ffffffff8103b41c
|
||||
#1 [ffff880274d25c18] crash_kexec at ffffffff810d881a
|
||||
#2 [ffff880274d25cd8] oops_end at ffffffff81525818
|
||||
#3 [ffff880274d25cf8] do_invalid_op at ffffffff81003096
|
||||
#4 [ffff880274d25d90] invalid_op at ffffffff8152d3de
|
||||
[exception RIP: set_cpus_allowed_rt+18]
|
||||
RIP: ffffffff8109e012 RSP: ffff880274d25e48 RFLAGS: 00010202
|
||||
RAX: ffffffff8109e000 RBX: ffff88026f979da0 RCX: ffff8802770cb6e8
|
||||
RDX: 0000000000000000 RSI: ffffffff81add700 RDI: ffff88026f979da0
|
||||
RBP: ffff880274d25e78 R8: ffffffff816112e0 R9: 0000000000000001
|
||||
R10: 0000000000000001 R11: 0000000000011940 R12: ffff88026f979da0
|
||||
R13: ffff8802770cb6d0 R14: ffff880274d25fd8 R15: 0000000000000000
|
||||
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
|
||||
#5 [ffff880274d25e60] do_set_cpus_allowed at ffffffff8108e65f
|
||||
#6 [ffff880274d25e80] sync_unplug_thread at ffffffff81058c08
|
||||
#7 [ffff880274d25ed8] kthread at ffffffff8107cad6
|
||||
#8 [ffff880274d25f50] ret_from_fork at ffffffff8152bbbc
|
||||
crash> task_struct ffff88026f979da0 | grep class
|
||||
sched_class = 0xffffffff816111e0 <fair_sched_class+64>,
|
||||
|
||||
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/cpu.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -267,7 +267,7 @@ static int sync_unplug_thread(void *data
|
||||
* we don't want any more work on this CPU.
|
||||
*/
|
||||
current->flags &= ~PF_NO_SETAFFINITY;
|
||||
- do_set_cpus_allowed(current, cpu_present_mask);
|
||||
+ set_cpus_allowed_ptr(current, cpu_present_mask);
|
||||
migrate_me();
|
||||
return 0;
|
||||
}
|
|
@ -1,205 +0,0 @@
|
|||
Subject: hotplug: Lightweight get online cpus
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 15 Jun 2011 12:36:06 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
get_online_cpus() is a heavy weight function which involves a global
|
||||
mutex. migrate_disable() wants a simpler construct which prevents only
|
||||
a CPU from going doing while a task is in a migrate disabled section.
|
||||
|
||||
Implement a per cpu lockless mechanism, which serializes only in the
|
||||
real unplug case on a global mutex. That serialization affects only
|
||||
tasks on the cpu which should be brought down.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/cpu.h | 7 +--
|
||||
kernel/cpu.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++-
|
||||
2 files changed, 122 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/include/linux/cpu.h
|
||||
+++ b/include/linux/cpu.h
|
||||
@@ -221,9 +221,6 @@ static inline void smpboot_thread_init(v
|
||||
#endif /* CONFIG_SMP */
|
||||
extern struct bus_type cpu_subsys;
|
||||
|
||||
-static inline void pin_current_cpu(void) { }
|
||||
-static inline void unpin_current_cpu(void) { }
|
||||
-
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
/* Stop CPUs going up and down. */
|
||||
|
||||
@@ -234,6 +231,8 @@ extern bool try_get_online_cpus(void);
|
||||
extern void put_online_cpus(void);
|
||||
extern void cpu_hotplug_disable(void);
|
||||
extern void cpu_hotplug_enable(void);
|
||||
+extern void pin_current_cpu(void);
|
||||
+extern void unpin_current_cpu(void);
|
||||
#define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri)
|
||||
#define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri)
|
||||
#define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
|
||||
@@ -252,6 +251,8 @@ static inline void cpu_hotplug_done(void
|
||||
#define put_online_cpus() do { } while (0)
|
||||
#define cpu_hotplug_disable() do { } while (0)
|
||||
#define cpu_hotplug_enable() do { } while (0)
|
||||
+static inline void pin_current_cpu(void) { }
|
||||
+static inline void unpin_current_cpu(void) { }
|
||||
#define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
|
||||
#define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
|
||||
/* These aren't inline functions due to a GCC bug. */
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -88,6 +88,100 @@ static struct {
|
||||
#define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
|
||||
#define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
|
||||
|
||||
+struct hotplug_pcp {
|
||||
+ struct task_struct *unplug;
|
||||
+ int refcount;
|
||||
+ struct completion synced;
|
||||
+};
|
||||
+
|
||||
+static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
|
||||
+
|
||||
+/**
|
||||
+ * pin_current_cpu - Prevent the current cpu from being unplugged
|
||||
+ *
|
||||
+ * Lightweight version of get_online_cpus() to prevent cpu from being
|
||||
+ * unplugged when code runs in a migration disabled region.
|
||||
+ *
|
||||
+ * Must be called with preemption disabled (preempt_count = 1)!
|
||||
+ */
|
||||
+void pin_current_cpu(void)
|
||||
+{
|
||||
+ struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
|
||||
+
|
||||
+retry:
|
||||
+ if (!hp->unplug || hp->refcount || preempt_count() > 1 ||
|
||||
+ hp->unplug == current) {
|
||||
+ hp->refcount++;
|
||||
+ return;
|
||||
+ }
|
||||
+ preempt_enable();
|
||||
+ mutex_lock(&cpu_hotplug.lock);
|
||||
+ mutex_unlock(&cpu_hotplug.lock);
|
||||
+ preempt_disable();
|
||||
+ goto retry;
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * unpin_current_cpu - Allow unplug of current cpu
|
||||
+ *
|
||||
+ * Must be called with preemption or interrupts disabled!
|
||||
+ */
|
||||
+void unpin_current_cpu(void)
|
||||
+{
|
||||
+ struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
|
||||
+
|
||||
+ WARN_ON(hp->refcount <= 0);
|
||||
+
|
||||
+ /* This is safe. sync_unplug_thread is pinned to this cpu */
|
||||
+ if (!--hp->refcount && hp->unplug && hp->unplug != current)
|
||||
+ wake_up_process(hp->unplug);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * FIXME: Is this really correct under all circumstances ?
|
||||
+ */
|
||||
+static int sync_unplug_thread(void *data)
|
||||
+{
|
||||
+ struct hotplug_pcp *hp = data;
|
||||
+
|
||||
+ preempt_disable();
|
||||
+ hp->unplug = current;
|
||||
+ set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
+ while (hp->refcount) {
|
||||
+ schedule_preempt_disabled();
|
||||
+ set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
+ }
|
||||
+ set_current_state(TASK_RUNNING);
|
||||
+ preempt_enable();
|
||||
+ complete(&hp->synced);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * Start the sync_unplug_thread on the target cpu and wait for it to
|
||||
+ * complete.
|
||||
+ */
|
||||
+static int cpu_unplug_begin(unsigned int cpu)
|
||||
+{
|
||||
+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
|
||||
+ struct task_struct *tsk;
|
||||
+
|
||||
+ init_completion(&hp->synced);
|
||||
+ tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d\n", cpu);
|
||||
+ if (IS_ERR(tsk))
|
||||
+ return (PTR_ERR(tsk));
|
||||
+ kthread_bind(tsk, cpu);
|
||||
+ wake_up_process(tsk);
|
||||
+ wait_for_completion(&hp->synced);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void cpu_unplug_done(unsigned int cpu)
|
||||
+{
|
||||
+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
|
||||
+
|
||||
+ hp->unplug = NULL;
|
||||
+}
|
||||
|
||||
void get_online_cpus(void)
|
||||
{
|
||||
@@ -349,13 +443,14 @@ static int __ref take_cpu_down(void *_pa
|
||||
/* Requires cpu_add_remove_lock to be held */
|
||||
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
|
||||
{
|
||||
- int err, nr_calls = 0;
|
||||
+ int mycpu, err, nr_calls = 0;
|
||||
void *hcpu = (void *)(long)cpu;
|
||||
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
|
||||
struct take_cpu_down_param tcd_param = {
|
||||
.mod = mod,
|
||||
.hcpu = hcpu,
|
||||
};
|
||||
+ cpumask_var_t cpumask;
|
||||
|
||||
if (num_online_cpus() == 1)
|
||||
return -EBUSY;
|
||||
@@ -363,7 +458,27 @@ static int __ref _cpu_down(unsigned int
|
||||
if (!cpu_online(cpu))
|
||||
return -EINVAL;
|
||||
|
||||
+ /* Move the downtaker off the unplug cpu */
|
||||
+ if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
|
||||
+ return -ENOMEM;
|
||||
+ cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
|
||||
+ set_cpus_allowed_ptr(current, cpumask);
|
||||
+ free_cpumask_var(cpumask);
|
||||
+ preempt_disable();
|
||||
+ mycpu = smp_processor_id();
|
||||
+ if (mycpu == cpu) {
|
||||
+ printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
|
||||
+ preempt_enable();
|
||||
+ return -EBUSY;
|
||||
+ }
|
||||
+ preempt_enable();
|
||||
+
|
||||
cpu_hotplug_begin();
|
||||
+ err = cpu_unplug_begin(cpu);
|
||||
+ if (err) {
|
||||
+ printk("cpu_unplug_begin(%d) failed\n", cpu);
|
||||
+ goto out_cancel;
|
||||
+ }
|
||||
|
||||
err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
|
||||
if (err) {
|
||||
@@ -427,6 +542,8 @@ static int __ref _cpu_down(unsigned int
|
||||
check_for_tasks(cpu);
|
||||
|
||||
out_release:
|
||||
+ cpu_unplug_done(cpu);
|
||||
+out_cancel:
|
||||
cpu_hotplug_done();
|
||||
if (!err)
|
||||
cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
|
|
@ -1,25 +0,0 @@
|
|||
Subject: hotplug: sync_unplug: No "\n" in task name
|
||||
From: Yong Zhang <yong.zhang0@gmail.com>
|
||||
Date: Sun, 16 Oct 2011 18:56:43 +0800
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Otherwise the output will look a little odd.
|
||||
|
||||
Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
|
||||
Link: http://lkml.kernel.org/r/1318762607-2261-2-git-send-email-yong.zhang0@gmail.com
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/cpu.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -167,7 +167,7 @@ static int cpu_unplug_begin(unsigned int
|
||||
struct task_struct *tsk;
|
||||
|
||||
init_completion(&hp->synced);
|
||||
- tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d\n", cpu);
|
||||
+ tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
|
||||
if (IS_ERR(tsk))
|
||||
return (PTR_ERR(tsk));
|
||||
kthread_bind(tsk, cpu);
|
|
@ -1,40 +0,0 @@
|
|||
Subject: hotplug: Use migrate disable on unplug
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 17 Jul 2011 19:35:29 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Migration needs to be disabled accross the unplug handling to make
|
||||
sure that the unplug thread is off the unplugged cpu.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/cpu.c | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -466,14 +466,13 @@ static int __ref _cpu_down(unsigned int
|
||||
cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
|
||||
set_cpus_allowed_ptr(current, cpumask);
|
||||
free_cpumask_var(cpumask);
|
||||
- preempt_disable();
|
||||
+ migrate_disable();
|
||||
mycpu = smp_processor_id();
|
||||
if (mycpu == cpu) {
|
||||
printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
|
||||
- preempt_enable();
|
||||
+ migrate_enable();
|
||||
return -EBUSY;
|
||||
}
|
||||
- preempt_enable();
|
||||
|
||||
cpu_hotplug_begin();
|
||||
err = cpu_unplug_begin(cpu);
|
||||
@@ -546,6 +545,7 @@ static int __ref _cpu_down(unsigned int
|
||||
out_release:
|
||||
cpu_unplug_done(cpu);
|
||||
out_cancel:
|
||||
+ migrate_enable();
|
||||
cpu_hotplug_done();
|
||||
if (!err)
|
||||
cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
|
|
@ -1,118 +0,0 @@
|
|||
From: Yang Shi <yang.shi@windriver.com>
|
||||
Date: Mon, 16 Sep 2013 14:09:19 -0700
|
||||
Subject: hrtimer: Move schedule_work call to helper thread
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
When run ltp leapsec_timer test, the following call trace is caught:
|
||||
|
||||
BUG: sleeping function called from invalid context at kernel/rtmutex.c:659
|
||||
in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/1
|
||||
Preemption disabled at:[<ffffffff810857f3>] cpu_startup_entry+0x133/0x310
|
||||
|
||||
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.10.10-rt3 #2
|
||||
Hardware name: Intel Corporation Calpella platform/MATXM-CORE-411-B, BIOS 4.6.3 08/18/2010
|
||||
ffffffff81c2f800 ffff880076843e40 ffffffff8169918d ffff880076843e58
|
||||
ffffffff8106db31 ffff88007684b4a0 ffff880076843e70 ffffffff8169d9c0
|
||||
ffff88007684b4a0 ffff880076843eb0 ffffffff81059da1 0000001876851200
|
||||
Call Trace:
|
||||
<IRQ> [<ffffffff8169918d>] dump_stack+0x19/0x1b
|
||||
[<ffffffff8106db31>] __might_sleep+0xf1/0x170
|
||||
[<ffffffff8169d9c0>] rt_spin_lock+0x20/0x50
|
||||
[<ffffffff81059da1>] queue_work_on+0x61/0x100
|
||||
[<ffffffff81065aa1>] clock_was_set_delayed+0x21/0x30
|
||||
[<ffffffff810883be>] do_timer+0x40e/0x660
|
||||
[<ffffffff8108f487>] tick_do_update_jiffies64+0xf7/0x140
|
||||
[<ffffffff8108fe42>] tick_check_idle+0x92/0xc0
|
||||
[<ffffffff81044327>] irq_enter+0x57/0x70
|
||||
[<ffffffff816a040e>] smp_apic_timer_interrupt+0x3e/0x9b
|
||||
[<ffffffff8169f80a>] apic_timer_interrupt+0x6a/0x70
|
||||
<EOI> [<ffffffff8155ea1c>] ? cpuidle_enter_state+0x4c/0xc0
|
||||
[<ffffffff8155eb68>] cpuidle_idle_call+0xd8/0x2d0
|
||||
[<ffffffff8100b59e>] arch_cpu_idle+0xe/0x30
|
||||
[<ffffffff8108585e>] cpu_startup_entry+0x19e/0x310
|
||||
[<ffffffff8168efa2>] start_secondary+0x1ad/0x1b0
|
||||
|
||||
The clock_was_set_delayed is called in hard IRQ handler (timer interrupt), which
|
||||
calls schedule_work.
|
||||
|
||||
Under PREEMPT_RT_FULL, schedule_work calls spinlocks which could sleep, so it's
|
||||
not safe to call schedule_work in interrupt context.
|
||||
|
||||
Reference upstream commit b68d61c705ef02384c0538b8d9374545097899ca
|
||||
(rt,ntp: Move call to schedule_delayed_work() to helper thread)
|
||||
from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git, which
|
||||
makes a similar change.
|
||||
|
||||
add a helper thread which does the call to schedule_work and wake up that
|
||||
thread instead of calling schedule_work directly.
|
||||
|
||||
|
||||
Signed-off-by: Yang Shi <yang.shi@windriver.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/time/hrtimer.c | 40 ++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 40 insertions(+)
|
||||
|
||||
--- a/kernel/time/hrtimer.c
|
||||
+++ b/kernel/time/hrtimer.c
|
||||
@@ -48,6 +48,7 @@
|
||||
#include <linux/sched/rt.h>
|
||||
#include <linux/sched/deadline.h>
|
||||
#include <linux/timer.h>
|
||||
+#include <linux/kthread.h>
|
||||
#include <linux/freezer.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
@@ -713,6 +714,44 @@ static void clock_was_set_work(struct wo
|
||||
|
||||
static DECLARE_WORK(hrtimer_work, clock_was_set_work);
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+/*
|
||||
+ * RT can not call schedule_work from real interrupt context.
|
||||
+ * Need to make a thread to do the real work.
|
||||
+ */
|
||||
+static struct task_struct *clock_set_delay_thread;
|
||||
+static bool do_clock_set_delay;
|
||||
+
|
||||
+static int run_clock_set_delay(void *ignore)
|
||||
+{
|
||||
+ while (!kthread_should_stop()) {
|
||||
+ set_current_state(TASK_INTERRUPTIBLE);
|
||||
+ if (do_clock_set_delay) {
|
||||
+ do_clock_set_delay = false;
|
||||
+ schedule_work(&hrtimer_work);
|
||||
+ }
|
||||
+ schedule();
|
||||
+ }
|
||||
+ __set_current_state(TASK_RUNNING);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+void clock_was_set_delayed(void)
|
||||
+{
|
||||
+ do_clock_set_delay = true;
|
||||
+ /* Make visible before waking up process */
|
||||
+ smp_wmb();
|
||||
+ wake_up_process(clock_set_delay_thread);
|
||||
+}
|
||||
+
|
||||
+static __init int create_clock_set_delay_thread(void)
|
||||
+{
|
||||
+ clock_set_delay_thread = kthread_run(run_clock_set_delay, NULL, "kclksetdelayd");
|
||||
+ BUG_ON(!clock_set_delay_thread);
|
||||
+ return 0;
|
||||
+}
|
||||
+early_initcall(create_clock_set_delay_thread);
|
||||
+#else /* PREEMPT_RT_FULL */
|
||||
/*
|
||||
* Called from timekeeping and resume code to reprogramm the hrtimer
|
||||
* interrupt device on all cpus.
|
||||
@@ -721,6 +760,7 @@ void clock_was_set_delayed(void)
|
||||
{
|
||||
schedule_work(&hrtimer_work);
|
||||
}
|
||||
+#endif
|
||||
|
||||
#else
|
||||
|
|
@ -1,463 +0,0 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 3 Jul 2009 08:44:31 -0500
|
||||
Subject: hrtimer: Fixup hrtimer callback changes for preempt-rt
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
In preempt-rt we can not call the callbacks which take sleeping locks
|
||||
from the timer interrupt context.
|
||||
|
||||
Bring back the softirq split for now, until we fixed the signal
|
||||
delivery problem for real.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
|
||||
---
|
||||
include/linux/hrtimer.h | 3
|
||||
kernel/sched/core.c | 1
|
||||
kernel/sched/rt.c | 1
|
||||
kernel/time/hrtimer.c | 219 +++++++++++++++++++++++++++++++++++++++++------
|
||||
kernel/time/tick-sched.c | 1
|
||||
kernel/watchdog.c | 1
|
||||
6 files changed, 200 insertions(+), 26 deletions(-)
|
||||
|
||||
--- a/include/linux/hrtimer.h
|
||||
+++ b/include/linux/hrtimer.h
|
||||
@@ -111,6 +111,8 @@ struct hrtimer {
|
||||
enum hrtimer_restart (*function)(struct hrtimer *);
|
||||
struct hrtimer_clock_base *base;
|
||||
unsigned long state;
|
||||
+ struct list_head cb_entry;
|
||||
+ int irqsafe;
|
||||
#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
|
||||
ktime_t praecox;
|
||||
#endif
|
||||
@@ -150,6 +152,7 @@ struct hrtimer_clock_base {
|
||||
int index;
|
||||
clockid_t clockid;
|
||||
struct timerqueue_head active;
|
||||
+ struct list_head expired;
|
||||
ktime_t resolution;
|
||||
ktime_t (*get_time)(void);
|
||||
ktime_t softirq_time;
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -461,6 +461,7 @@ static void init_rq_hrtick(struct rq *rq
|
||||
|
||||
hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
rq->hrtick_timer.function = hrtick;
|
||||
+ rq->hrtick_timer.irqsafe = 1;
|
||||
}
|
||||
#else /* CONFIG_SCHED_HRTICK */
|
||||
static inline void hrtick_clear(struct rq *rq)
|
||||
--- a/kernel/sched/rt.c
|
||||
+++ b/kernel/sched/rt.c
|
||||
@@ -44,6 +44,7 @@ void init_rt_bandwidth(struct rt_bandwid
|
||||
|
||||
hrtimer_init(&rt_b->rt_period_timer,
|
||||
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
+ rt_b->rt_period_timer.irqsafe = 1;
|
||||
rt_b->rt_period_timer.function = sched_rt_period_timer;
|
||||
}
|
||||
|
||||
--- a/kernel/time/hrtimer.c
|
||||
+++ b/kernel/time/hrtimer.c
|
||||
@@ -577,8 +577,7 @@ static int hrtimer_reprogram(struct hrti
|
||||
* When the callback is running, we do not reprogram the clock event
|
||||
* device. The timer callback is either running on a different CPU or
|
||||
* the callback is executed in the hrtimer_interrupt context. The
|
||||
- * reprogramming is handled either by the softirq, which called the
|
||||
- * callback or at the end of the hrtimer_interrupt.
|
||||
+ * reprogramming is handled at the end of the hrtimer_interrupt.
|
||||
*/
|
||||
if (hrtimer_callback_running(timer))
|
||||
return 0;
|
||||
@@ -622,6 +621,9 @@ static int hrtimer_reprogram(struct hrti
|
||||
return res;
|
||||
}
|
||||
|
||||
+static void __run_hrtimer(struct hrtimer *timer, ktime_t *now);
|
||||
+static int hrtimer_rt_defer(struct hrtimer *timer);
|
||||
+
|
||||
/*
|
||||
* Initialize the high resolution related parts of cpu_base
|
||||
*/
|
||||
@@ -631,6 +633,21 @@ static inline void hrtimer_init_hres(str
|
||||
base->hres_active = 0;
|
||||
}
|
||||
|
||||
+static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
|
||||
+ struct hrtimer_clock_base *base,
|
||||
+ int wakeup)
|
||||
+{
|
||||
+ if (!hrtimer_reprogram(timer, base))
|
||||
+ return 0;
|
||||
+ if (!wakeup)
|
||||
+ return -ETIME;
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ if (!hrtimer_rt_defer(timer))
|
||||
+ return -ETIME;
|
||||
+#endif
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
|
||||
{
|
||||
ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
|
||||
@@ -712,6 +729,13 @@ static inline int hrtimer_is_hres_enable
|
||||
static inline int hrtimer_switch_to_hres(void) { return 0; }
|
||||
static inline void
|
||||
hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
|
||||
+static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
|
||||
+ struct hrtimer_clock_base *base,
|
||||
+ int wakeup)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static inline int hrtimer_reprogram(struct hrtimer *timer,
|
||||
struct hrtimer_clock_base *base)
|
||||
{
|
||||
@@ -719,7 +743,6 @@ static inline int hrtimer_reprogram(stru
|
||||
}
|
||||
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
|
||||
static inline void retrigger_next_event(void *arg) { }
|
||||
-
|
||||
#endif /* CONFIG_HIGH_RES_TIMERS */
|
||||
|
||||
/*
|
||||
@@ -854,9 +877,9 @@ void hrtimer_wait_for_timer(const struct
|
||||
{
|
||||
struct hrtimer_clock_base *base = timer->base;
|
||||
|
||||
- if (base && base->cpu_base && !hrtimer_hres_active())
|
||||
+ if (base && base->cpu_base && !timer->irqsafe)
|
||||
wait_event(base->cpu_base->wait,
|
||||
- !(timer->state & HRTIMER_STATE_CALLBACK));
|
||||
+ !(timer->state & HRTIMER_STATE_CALLBACK));
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -906,6 +929,11 @@ static void __remove_hrtimer(struct hrti
|
||||
if (!(timer->state & HRTIMER_STATE_ENQUEUED))
|
||||
goto out;
|
||||
|
||||
+ if (unlikely(!list_empty(&timer->cb_entry))) {
|
||||
+ list_del_init(&timer->cb_entry);
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
next_timer = timerqueue_getnext(&base->active);
|
||||
timerqueue_del(&base->active, &timer->node);
|
||||
if (&timer->node == next_timer) {
|
||||
@@ -1016,15 +1044,26 @@ int __hrtimer_start_range_ns(struct hrti
|
||||
* on dynticks target.
|
||||
*/
|
||||
wake_up_nohz_cpu(new_base->cpu_base->cpu);
|
||||
- } else if (new_base->cpu_base == this_cpu_ptr(&hrtimer_bases) &&
|
||||
- hrtimer_reprogram(timer, new_base)) {
|
||||
+ } else if (new_base->cpu_base == this_cpu_ptr(&hrtimer_bases)) {
|
||||
+
|
||||
+ ret = hrtimer_enqueue_reprogram(timer, new_base, wakeup);
|
||||
+ if (ret < 0) {
|
||||
+ /*
|
||||
+ * In case we failed to reprogram the timer (mostly
|
||||
+ * because out current timer is already elapsed),
|
||||
+ * remove it again and report a failure. This avoids
|
||||
+ * stale base->first entries.
|
||||
+ */
|
||||
+ debug_deactivate(timer);
|
||||
+ __remove_hrtimer(timer, new_base,
|
||||
+ timer->state & HRTIMER_STATE_CALLBACK, 0);
|
||||
+ } else if (ret > 0) {
|
||||
/*
|
||||
* Only allow reprogramming if the new base is on this CPU.
|
||||
* (it might still be on another CPU if the timer was pending)
|
||||
*
|
||||
* XXX send_remote_softirq() ?
|
||||
*/
|
||||
- if (wakeup) {
|
||||
/*
|
||||
* We need to drop cpu_base->lock to avoid a
|
||||
* lock ordering issue vs. rq->lock.
|
||||
@@ -1032,9 +1071,7 @@ int __hrtimer_start_range_ns(struct hrti
|
||||
raw_spin_unlock(&new_base->cpu_base->lock);
|
||||
raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
||||
local_irq_restore(flags);
|
||||
- return ret;
|
||||
- } else {
|
||||
- __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
||||
+ return 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1189,6 +1226,7 @@ static void __hrtimer_init(struct hrtime
|
||||
|
||||
base = hrtimer_clockid_to_base(clock_id);
|
||||
timer->base = &cpu_base->clock_base[base];
|
||||
+ INIT_LIST_HEAD(&timer->cb_entry);
|
||||
timerqueue_init(&timer->node);
|
||||
|
||||
#ifdef CONFIG_TIMER_STATS
|
||||
@@ -1272,10 +1310,128 @@ static void __run_hrtimer(struct hrtimer
|
||||
timer->state &= ~HRTIMER_STATE_CALLBACK;
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
-
|
||||
static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
|
||||
+ struct hrtimer_clock_base *base)
|
||||
+{
|
||||
+ /*
|
||||
+ * Note, we clear the callback flag before we requeue the
|
||||
+ * timer otherwise we trigger the callback_running() check
|
||||
+ * in hrtimer_reprogram().
|
||||
+ */
|
||||
+ timer->state &= ~HRTIMER_STATE_CALLBACK;
|
||||
+
|
||||
+ if (restart != HRTIMER_NORESTART) {
|
||||
+ BUG_ON(hrtimer_active(timer));
|
||||
+ /*
|
||||
+ * Enqueue the timer, if it's the leftmost timer then
|
||||
+ * we need to reprogram it.
|
||||
+ */
|
||||
+ if (!enqueue_hrtimer(timer, base))
|
||||
+ return;
|
||||
+
|
||||
+#ifndef CONFIG_HIGH_RES_TIMERS
|
||||
+ }
|
||||
+#else
|
||||
+ if (base->cpu_base->hres_active &&
|
||||
+ hrtimer_reprogram(timer, base))
|
||||
+ goto requeue;
|
||||
+
|
||||
+ } else if (hrtimer_active(timer)) {
|
||||
+ /*
|
||||
+ * If the timer was rearmed on another CPU, reprogram
|
||||
+ * the event device.
|
||||
+ */
|
||||
+ if (&timer->node == base->active.next &&
|
||||
+ base->cpu_base->hres_active &&
|
||||
+ hrtimer_reprogram(timer, base))
|
||||
+ goto requeue;
|
||||
+ }
|
||||
+ return;
|
||||
+
|
||||
+requeue:
|
||||
+ /*
|
||||
+ * Timer is expired. Thus move it from tree to pending list
|
||||
+ * again.
|
||||
+ */
|
||||
+ __remove_hrtimer(timer, base, timer->state, 0);
|
||||
+ list_add_tail(&timer->cb_entry, &base->expired);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * The changes in mainline which removed the callback modes from
|
||||
+ * hrtimer are not yet working with -rt. The non wakeup_process()
|
||||
+ * based callbacks which involve sleeping locks need to be treated
|
||||
+ * seperately.
|
||||
+ */
|
||||
+static void hrtimer_rt_run_pending(void)
|
||||
+{
|
||||
+ enum hrtimer_restart (*fn)(struct hrtimer *);
|
||||
+ struct hrtimer_cpu_base *cpu_base;
|
||||
+ struct hrtimer_clock_base *base;
|
||||
+ struct hrtimer *timer;
|
||||
+ int index, restart;
|
||||
+
|
||||
+ local_irq_disable();
|
||||
+ cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
|
||||
+
|
||||
+ raw_spin_lock(&cpu_base->lock);
|
||||
+
|
||||
+ for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
|
||||
+ base = &cpu_base->clock_base[index];
|
||||
+
|
||||
+ while (!list_empty(&base->expired)) {
|
||||
+ timer = list_first_entry(&base->expired,
|
||||
+ struct hrtimer, cb_entry);
|
||||
+
|
||||
+ /*
|
||||
+ * Same as the above __run_hrtimer function
|
||||
+ * just we run with interrupts enabled.
|
||||
+ */
|
||||
+ debug_hrtimer_deactivate(timer);
|
||||
+ __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
|
||||
+ timer_stats_account_hrtimer(timer);
|
||||
+ fn = timer->function;
|
||||
+
|
||||
+ raw_spin_unlock_irq(&cpu_base->lock);
|
||||
+ restart = fn(timer);
|
||||
+ raw_spin_lock_irq(&cpu_base->lock);
|
||||
+
|
||||
+ hrtimer_rt_reprogram(restart, timer, base);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ raw_spin_unlock_irq(&cpu_base->lock);
|
||||
+
|
||||
+ wake_up_timer_waiters(cpu_base);
|
||||
+}
|
||||
+
|
||||
+static int hrtimer_rt_defer(struct hrtimer *timer)
|
||||
+{
|
||||
+ if (timer->irqsafe)
|
||||
+ return 0;
|
||||
+
|
||||
+ __remove_hrtimer(timer, timer->base, timer->state, 0);
|
||||
+ list_add_tail(&timer->cb_entry, &timer->base->expired);
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+#else
|
||||
+
|
||||
+static inline void hrtimer_rt_run_pending(void)
|
||||
+{
|
||||
+ hrtimer_peek_ahead_timers();
|
||||
+}
|
||||
+
|
||||
+static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
+#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
+
|
||||
/*
|
||||
* High resolution timer interrupt
|
||||
* Called with interrupts disabled
|
||||
@@ -1284,7 +1440,7 @@ void hrtimer_interrupt(struct clock_even
|
||||
{
|
||||
struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
|
||||
ktime_t expires_next, now, entry_time, delta;
|
||||
- int i, retries = 0;
|
||||
+ int i, retries = 0, raise = 0;
|
||||
|
||||
BUG_ON(!cpu_base->hres_active);
|
||||
cpu_base->nr_events++;
|
||||
@@ -1343,7 +1499,10 @@ void hrtimer_interrupt(struct clock_even
|
||||
if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer))
|
||||
break;
|
||||
|
||||
- __run_hrtimer(timer, &basenow);
|
||||
+ if (!hrtimer_rt_defer(timer))
|
||||
+ __run_hrtimer(timer, &basenow);
|
||||
+ else
|
||||
+ raise = 1;
|
||||
}
|
||||
}
|
||||
/* Reevaluate the clock bases for the next expiry */
|
||||
@@ -1360,6 +1519,10 @@ void hrtimer_interrupt(struct clock_even
|
||||
if (expires_next.tv64 == KTIME_MAX ||
|
||||
!tick_program_event(expires_next, 0)) {
|
||||
cpu_base->hang_detected = 0;
|
||||
+
|
||||
+ if (raise)
|
||||
+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
||||
+
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1439,18 +1602,18 @@ void hrtimer_peek_ahead_timers(void)
|
||||
__hrtimer_peek_ahead_timers();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
-
|
||||
-static void run_hrtimer_softirq(struct softirq_action *h)
|
||||
-{
|
||||
- hrtimer_peek_ahead_timers();
|
||||
-}
|
||||
-
|
||||
#else /* CONFIG_HIGH_RES_TIMERS */
|
||||
|
||||
static inline void __hrtimer_peek_ahead_timers(void) { }
|
||||
|
||||
#endif /* !CONFIG_HIGH_RES_TIMERS */
|
||||
|
||||
+
|
||||
+static void run_hrtimer_softirq(struct softirq_action *h)
|
||||
+{
|
||||
+ hrtimer_rt_run_pending();
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Called from timer softirq every jiffy, expire hrtimers:
|
||||
*
|
||||
@@ -1483,7 +1646,7 @@ void hrtimer_run_queues(void)
|
||||
struct timerqueue_node *node;
|
||||
struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
|
||||
struct hrtimer_clock_base *base;
|
||||
- int index, gettime = 1;
|
||||
+ int index, gettime = 1, raise = 0;
|
||||
|
||||
if (hrtimer_hres_active())
|
||||
return;
|
||||
@@ -1508,12 +1671,16 @@ void hrtimer_run_queues(void)
|
||||
hrtimer_get_expires_tv64(timer))
|
||||
break;
|
||||
|
||||
- __run_hrtimer(timer, &base->softirq_time);
|
||||
+ if (!hrtimer_rt_defer(timer))
|
||||
+ __run_hrtimer(timer, &base->softirq_time);
|
||||
+ else
|
||||
+ raise = 1;
|
||||
}
|
||||
raw_spin_unlock(&cpu_base->lock);
|
||||
}
|
||||
|
||||
- wake_up_timer_waiters(cpu_base);
|
||||
+ if (raise)
|
||||
+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1535,6 +1702,7 @@ static enum hrtimer_restart hrtimer_wake
|
||||
void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
|
||||
{
|
||||
sl->timer.function = hrtimer_wakeup;
|
||||
+ sl->timer.irqsafe = 1;
|
||||
sl->task = task;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
|
||||
@@ -1671,6 +1839,7 @@ static void init_hrtimers_cpu(int cpu)
|
||||
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
|
||||
cpu_base->clock_base[i].cpu_base = cpu_base;
|
||||
timerqueue_init_head(&cpu_base->clock_base[i].active);
|
||||
+ INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
|
||||
}
|
||||
|
||||
cpu_base->cpu = cpu;
|
||||
@@ -1783,9 +1952,7 @@ void __init hrtimers_init(void)
|
||||
hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
|
||||
(void *)(long)smp_processor_id());
|
||||
register_cpu_notifier(&hrtimers_nb);
|
||||
-#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
|
||||
-#endif
|
||||
}
|
||||
|
||||
/**
|
||||
--- a/kernel/time/tick-sched.c
|
||||
+++ b/kernel/time/tick-sched.c
|
||||
@@ -1159,6 +1159,7 @@ void tick_setup_sched_timer(void)
|
||||
* Emulate tick processing via per-CPU hrtimers:
|
||||
*/
|
||||
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
|
||||
+ ts->sched_timer.irqsafe = 1;
|
||||
ts->sched_timer.function = tick_sched_timer;
|
||||
|
||||
/* Get the next period (per cpu) */
|
||||
--- a/kernel/watchdog.c
|
||||
+++ b/kernel/watchdog.c
|
||||
@@ -454,6 +454,7 @@ static void watchdog_enable(unsigned int
|
||||
/* kick off the timer for the hardlockup detector */
|
||||
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
hrtimer->function = watchdog_timer_fn;
|
||||
+ hrtimer->irqsafe = 1;
|
||||
|
||||
/* Enable the perf event */
|
||||
watchdog_nmi_enable(cpu);
|
|
@ -1,38 +0,0 @@
|
|||
Subject: hrtimer: Raise softirq if hrtimer irq stalled
|
||||
From: Watanabe <shunsuke.watanabe@tel.com>
|
||||
Date: Sun, 28 Oct 2012 11:13:44 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
When the hrtimer stall detection hits the softirq is not raised.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
kernel/time/hrtimer.c | 9 ++++-----
|
||||
1 file changed, 4 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/kernel/time/hrtimer.c
|
||||
+++ b/kernel/time/hrtimer.c
|
||||
@@ -1519,11 +1519,7 @@ void hrtimer_interrupt(struct clock_even
|
||||
if (expires_next.tv64 == KTIME_MAX ||
|
||||
!tick_program_event(expires_next, 0)) {
|
||||
cpu_base->hang_detected = 0;
|
||||
-
|
||||
- if (raise)
|
||||
- raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
||||
-
|
||||
- return;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1567,6 +1563,9 @@ void hrtimer_interrupt(struct clock_even
|
||||
tick_program_event(expires_next, 1);
|
||||
printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
|
||||
ktime_to_ns(delta));
|
||||
+out:
|
||||
+ if (raise)
|
||||
+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
||||
}
|
||||
|
||||
/*
|
|
@ -1,196 +0,0 @@
|
|||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:34 -0500
|
||||
Subject: hrtimers: Prepare full preemption
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Make cancellation of a running callback in softirq context safe
|
||||
against preemption.
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
include/linux/hrtimer.h | 10 ++++++++++
|
||||
kernel/time/hrtimer.c | 33 ++++++++++++++++++++++++++++++++-
|
||||
kernel/time/itimer.c | 1 +
|
||||
kernel/time/posix-timers.c | 33 +++++++++++++++++++++++++++++++++
|
||||
4 files changed, 76 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/hrtimer.h
|
||||
+++ b/include/linux/hrtimer.h
|
||||
@@ -197,6 +197,9 @@ struct hrtimer_cpu_base {
|
||||
unsigned long nr_hangs;
|
||||
ktime_t max_hang_time;
|
||||
#endif
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ wait_queue_head_t wait;
|
||||
+#endif
|
||||
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
|
||||
};
|
||||
|
||||
@@ -384,6 +387,13 @@ static inline int hrtimer_restart(struct
|
||||
return hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
|
||||
}
|
||||
|
||||
+/* Softirq preemption could deadlock timer removal */
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
|
||||
+#else
|
||||
+# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0)
|
||||
+#endif
|
||||
+
|
||||
/* Query timers: */
|
||||
extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
|
||||
extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
|
||||
--- a/kernel/time/hrtimer.c
|
||||
+++ b/kernel/time/hrtimer.c
|
||||
@@ -837,6 +837,32 @@ u64 hrtimer_forward(struct hrtimer *time
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hrtimer_forward);
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+# define wake_up_timer_waiters(b) wake_up(&(b)->wait)
|
||||
+
|
||||
+/**
|
||||
+ * hrtimer_wait_for_timer - Wait for a running timer
|
||||
+ *
|
||||
+ * @timer: timer to wait for
|
||||
+ *
|
||||
+ * The function waits in case the timers callback function is
|
||||
+ * currently executed on the waitqueue of the timer base. The
|
||||
+ * waitqueue is woken up after the timer callback function has
|
||||
+ * finished execution.
|
||||
+ */
|
||||
+void hrtimer_wait_for_timer(const struct hrtimer *timer)
|
||||
+{
|
||||
+ struct hrtimer_clock_base *base = timer->base;
|
||||
+
|
||||
+ if (base && base->cpu_base && !hrtimer_hres_active())
|
||||
+ wait_event(base->cpu_base->wait,
|
||||
+ !(timer->state & HRTIMER_STATE_CALLBACK));
|
||||
+}
|
||||
+
|
||||
+#else
|
||||
+# define wake_up_timer_waiters(b) do { } while (0)
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* enqueue_hrtimer - internal function to (re)start a timer
|
||||
*
|
||||
@@ -1099,7 +1125,7 @@ int hrtimer_cancel(struct hrtimer *timer
|
||||
|
||||
if (ret >= 0)
|
||||
return ret;
|
||||
- cpu_relax();
|
||||
+ hrtimer_wait_for_timer(timer);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hrtimer_cancel);
|
||||
@@ -1486,6 +1512,8 @@ void hrtimer_run_queues(void)
|
||||
}
|
||||
raw_spin_unlock(&cpu_base->lock);
|
||||
}
|
||||
+
|
||||
+ wake_up_timer_waiters(cpu_base);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1647,6 +1675,9 @@ static void init_hrtimers_cpu(int cpu)
|
||||
|
||||
cpu_base->cpu = cpu;
|
||||
hrtimer_init_hres(cpu_base);
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ init_waitqueue_head(&cpu_base->wait);
|
||||
+#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
--- a/kernel/time/itimer.c
|
||||
+++ b/kernel/time/itimer.c
|
||||
@@ -213,6 +213,7 @@ int do_setitimer(int which, struct itime
|
||||
/* We are sharing ->siglock with it_real_fn() */
|
||||
if (hrtimer_try_to_cancel(timer) < 0) {
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
+ hrtimer_wait_for_timer(&tsk->signal->real_timer);
|
||||
goto again;
|
||||
}
|
||||
expires = timeval_to_ktime(value->it_value);
|
||||
--- a/kernel/time/posix-timers.c
|
||||
+++ b/kernel/time/posix-timers.c
|
||||
@@ -821,6 +821,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_
|
||||
return overrun;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Protected by RCU!
|
||||
+ */
|
||||
+static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr)
|
||||
+{
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ if (kc->timer_set == common_timer_set)
|
||||
+ hrtimer_wait_for_timer(&timr->it.real.timer);
|
||||
+ else
|
||||
+ /* FIXME: Whacky hack for posix-cpu-timers */
|
||||
+ schedule_timeout(1);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
/* Set a POSIX.1b interval timer. */
|
||||
/* timr->it_lock is taken. */
|
||||
static int
|
||||
@@ -898,6 +912,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t,
|
||||
if (!timr)
|
||||
return -EINVAL;
|
||||
|
||||
+ rcu_read_lock();
|
||||
kc = clockid_to_kclock(timr->it_clock);
|
||||
if (WARN_ON_ONCE(!kc || !kc->timer_set))
|
||||
error = -EINVAL;
|
||||
@@ -906,9 +921,12 @@ SYSCALL_DEFINE4(timer_settime, timer_t,
|
||||
|
||||
unlock_timer(timr, flag);
|
||||
if (error == TIMER_RETRY) {
|
||||
+ timer_wait_for_callback(kc, timr);
|
||||
rtn = NULL; // We already got the old time...
|
||||
+ rcu_read_unlock();
|
||||
goto retry;
|
||||
}
|
||||
+ rcu_read_unlock();
|
||||
|
||||
if (old_setting && !error &&
|
||||
copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
|
||||
@@ -946,10 +964,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, t
|
||||
if (!timer)
|
||||
return -EINVAL;
|
||||
|
||||
+ rcu_read_lock();
|
||||
if (timer_delete_hook(timer) == TIMER_RETRY) {
|
||||
unlock_timer(timer, flags);
|
||||
+ timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
|
||||
+ timer);
|
||||
+ rcu_read_unlock();
|
||||
goto retry_delete;
|
||||
}
|
||||
+ rcu_read_unlock();
|
||||
|
||||
spin_lock(¤t->sighand->siglock);
|
||||
list_del(&timer->list);
|
||||
@@ -975,8 +998,18 @@ static void itimer_delete(struct k_itime
|
||||
retry_delete:
|
||||
spin_lock_irqsave(&timer->it_lock, flags);
|
||||
|
||||
+ /* On RT we can race with a deletion */
|
||||
+ if (!timer->it_signal) {
|
||||
+ unlock_timer(timer, flags);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
if (timer_delete_hook(timer) == TIMER_RETRY) {
|
||||
+ rcu_read_lock();
|
||||
unlock_timer(timer, flags);
|
||||
+ timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
|
||||
+ timer);
|
||||
+ rcu_read_unlock();
|
||||
goto retry_delete;
|
||||
}
|
||||
list_del(&timer->list);
|
|
@ -1,26 +0,0 @@
|
|||
From: Mike Galbraith <bitbucket@online.de>
|
||||
Date: Fri, 30 Aug 2013 07:57:25 +0200
|
||||
Subject: hwlat-detector: Don't ignore threshold module parameter
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
If the user specified a threshold at module load time, use it.
|
||||
|
||||
|
||||
Acked-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Mike Galbraith <bitbucket@online.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/misc/hwlat_detector.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/misc/hwlat_detector.c
|
||||
+++ b/drivers/misc/hwlat_detector.c
|
||||
@@ -414,7 +414,7 @@ static int init_stats(void)
|
||||
goto out;
|
||||
|
||||
__reset_stats();
|
||||
- data.threshold = DEFAULT_LAT_THRESHOLD; /* threshold us */
|
||||
+ data.threshold = threshold ?: DEFAULT_LAT_THRESHOLD; /* threshold us */
|
||||
data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
|
||||
data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */
|
||||
|
|
@ -1,126 +0,0 @@
|
|||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Mon, 19 Aug 2013 17:33:25 -0400
|
||||
Subject: hwlat-detector: Update hwlat_detector to add outer loop detection
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The hwlat_detector reads two timestamps in a row, then reports any
|
||||
gap between those calls. The problem is, it misses everything between
|
||||
the second reading of the time stamp to the first reading of the time stamp
|
||||
in the next loop. That's were most of the time is spent, which means,
|
||||
chances are likely that it will miss all hardware latencies. This
|
||||
defeats the purpose.
|
||||
|
||||
By also testing the first time stamp from the previous loop second
|
||||
time stamp (the outer loop), we are more likely to find a latency.
|
||||
|
||||
Setting the threshold to 1, here's what the report now looks like:
|
||||
|
||||
1347415723.0232202770 0 2
|
||||
1347415725.0234202822 0 2
|
||||
1347415727.0236202875 0 2
|
||||
1347415729.0238202928 0 2
|
||||
1347415731.0240202980 0 2
|
||||
1347415734.0243203061 0 2
|
||||
1347415736.0245203113 0 2
|
||||
1347415738.0247203166 2 0
|
||||
1347415740.0249203219 0 3
|
||||
1347415742.0251203272 0 3
|
||||
1347415743.0252203299 0 3
|
||||
1347415745.0254203351 0 2
|
||||
1347415747.0256203404 0 2
|
||||
1347415749.0258203457 0 2
|
||||
1347415751.0260203510 0 2
|
||||
1347415754.0263203589 0 2
|
||||
1347415756.0265203642 0 2
|
||||
1347415758.0267203695 0 2
|
||||
1347415760.0269203748 0 2
|
||||
1347415762.0271203801 0 2
|
||||
1347415764.0273203853 2 0
|
||||
|
||||
There's some hardware latency that takes 2 microseconds to run.
|
||||
|
||||
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/misc/hwlat_detector.c | 32 ++++++++++++++++++++++++++------
|
||||
1 file changed, 26 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/drivers/misc/hwlat_detector.c
|
||||
+++ b/drivers/misc/hwlat_detector.c
|
||||
@@ -143,6 +143,7 @@ static void detector_exit(void);
|
||||
struct sample {
|
||||
u64 seqnum; /* unique sequence */
|
||||
u64 duration; /* ktime delta */
|
||||
+ u64 outer_duration; /* ktime delta (outer loop) */
|
||||
struct timespec timestamp; /* wall time */
|
||||
unsigned long lost;
|
||||
};
|
||||
@@ -219,11 +220,13 @@ static struct sample *buffer_get_sample(
|
||||
*/
|
||||
static int get_sample(void *unused)
|
||||
{
|
||||
- ktime_t start, t1, t2;
|
||||
+ ktime_t start, t1, t2, last_t2;
|
||||
s64 diff, total = 0;
|
||||
u64 sample = 0;
|
||||
+ u64 outer_sample = 0;
|
||||
int ret = 1;
|
||||
|
||||
+ last_t2.tv64 = 0;
|
||||
start = ktime_get(); /* start timestamp */
|
||||
|
||||
do {
|
||||
@@ -231,7 +234,22 @@ static int get_sample(void *unused)
|
||||
t1 = ktime_get(); /* we'll look for a discontinuity */
|
||||
t2 = ktime_get();
|
||||
|
||||
+ if (last_t2.tv64) {
|
||||
+ /* Check the delta from outer loop (t2 to next t1) */
|
||||
+ diff = ktime_to_us(ktime_sub(t1, last_t2));
|
||||
+ /* This shouldn't happen */
|
||||
+ if (diff < 0) {
|
||||
+ pr_err(BANNER "time running backwards\n");
|
||||
+ goto out;
|
||||
+ }
|
||||
+ if (diff > outer_sample)
|
||||
+ outer_sample = diff;
|
||||
+ }
|
||||
+ last_t2 = t2;
|
||||
+
|
||||
total = ktime_to_us(ktime_sub(t2, start)); /* sample width */
|
||||
+
|
||||
+ /* This checks the inner loop (t1 to t2) */
|
||||
diff = ktime_to_us(ktime_sub(t2, t1)); /* current diff */
|
||||
|
||||
/* This shouldn't happen */
|
||||
@@ -246,12 +264,13 @@ static int get_sample(void *unused)
|
||||
} while (total <= data.sample_width);
|
||||
|
||||
/* If we exceed the threshold value, we have found a hardware latency */
|
||||
- if (sample > data.threshold) {
|
||||
+ if (sample > data.threshold || outer_sample > data.threshold) {
|
||||
struct sample s;
|
||||
|
||||
data.count++;
|
||||
s.seqnum = data.count;
|
||||
s.duration = sample;
|
||||
+ s.outer_duration = outer_sample;
|
||||
s.timestamp = CURRENT_TIME;
|
||||
__buffer_add_sample(&s);
|
||||
|
||||
@@ -738,10 +757,11 @@ static ssize_t debug_sample_fread(struct
|
||||
}
|
||||
}
|
||||
|
||||
- len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\n",
|
||||
- sample->timestamp.tv_sec,
|
||||
- sample->timestamp.tv_nsec,
|
||||
- sample->duration);
|
||||
+ len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n",
|
||||
+ sample->timestamp.tv_sec,
|
||||
+ sample->timestamp.tv_nsec,
|
||||
+ sample->duration,
|
||||
+ sample->outer_duration);
|
||||
|
||||
|
||||
/* handling partial reads is more trouble than it's worth */
|
|
@ -1,184 +0,0 @@
|
|||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Mon, 19 Aug 2013 17:33:27 -0400
|
||||
Subject: hwlat-detector: Use thread instead of stop machine
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
There's no reason to use stop machine to search for hardware latency.
|
||||
Simply disabling interrupts while running the loop will do enough to
|
||||
check if something comes in that wasn't disabled by interrupts being
|
||||
off, which is exactly what stop machine does.
|
||||
|
||||
Instead of using stop machine, just have the thread disable interrupts
|
||||
while it checks for hardware latency.
|
||||
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/misc/hwlat_detector.c | 60 ++++++++++++++++++------------------------
|
||||
1 file changed, 26 insertions(+), 34 deletions(-)
|
||||
|
||||
--- a/drivers/misc/hwlat_detector.c
|
||||
+++ b/drivers/misc/hwlat_detector.c
|
||||
@@ -41,7 +41,6 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/ring_buffer.h>
|
||||
-#include <linux/stop_machine.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/kthread.h>
|
||||
@@ -107,7 +106,6 @@ struct data; /* Global state */
|
||||
/* Sampling functions */
|
||||
static int __buffer_add_sample(struct sample *sample);
|
||||
static struct sample *buffer_get_sample(struct sample *sample);
|
||||
-static int get_sample(void *unused);
|
||||
|
||||
/* Threading and state */
|
||||
static int kthread_fn(void *unused);
|
||||
@@ -149,7 +147,7 @@ struct sample {
|
||||
unsigned long lost;
|
||||
};
|
||||
|
||||
-/* keep the global state somewhere. Mostly used under stop_machine. */
|
||||
+/* keep the global state somewhere. */
|
||||
static struct data {
|
||||
|
||||
struct mutex lock; /* protect changes */
|
||||
@@ -172,7 +170,7 @@ static struct data {
|
||||
* @sample: The new latency sample value
|
||||
*
|
||||
* This receives a new latency sample and records it in a global ring buffer.
|
||||
- * No additional locking is used in this case - suited for stop_machine use.
|
||||
+ * No additional locking is used in this case.
|
||||
*/
|
||||
static int __buffer_add_sample(struct sample *sample)
|
||||
{
|
||||
@@ -229,18 +227,18 @@ static struct sample *buffer_get_sample(
|
||||
#endif
|
||||
/**
|
||||
* get_sample - sample the CPU TSC and look for likely hardware latencies
|
||||
- * @unused: This is not used but is a part of the stop_machine API
|
||||
*
|
||||
* Used to repeatedly capture the CPU TSC (or similar), looking for potential
|
||||
- * hardware-induced latency. Called under stop_machine, with data.lock held.
|
||||
+ * hardware-induced latency. Called with interrupts disabled and with
|
||||
+ * data.lock held.
|
||||
*/
|
||||
-static int get_sample(void *unused)
|
||||
+static int get_sample(void)
|
||||
{
|
||||
time_type start, t1, t2, last_t2;
|
||||
s64 diff, total = 0;
|
||||
u64 sample = 0;
|
||||
u64 outer_sample = 0;
|
||||
- int ret = 1;
|
||||
+ int ret = -1;
|
||||
|
||||
init_time(last_t2, 0);
|
||||
start = time_get(); /* start timestamp */
|
||||
@@ -279,10 +277,14 @@ static int get_sample(void *unused)
|
||||
|
||||
} while (total <= data.sample_width);
|
||||
|
||||
+ ret = 0;
|
||||
+
|
||||
/* If we exceed the threshold value, we have found a hardware latency */
|
||||
if (sample > data.threshold || outer_sample > data.threshold) {
|
||||
struct sample s;
|
||||
|
||||
+ ret = 1;
|
||||
+
|
||||
data.count++;
|
||||
s.seqnum = data.count;
|
||||
s.duration = sample;
|
||||
@@ -295,7 +297,6 @@ static int get_sample(void *unused)
|
||||
data.max_sample = sample;
|
||||
}
|
||||
|
||||
- ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
@@ -305,32 +306,30 @@ static int get_sample(void *unused)
|
||||
* @unused: A required part of the kthread API.
|
||||
*
|
||||
* Used to periodically sample the CPU TSC via a call to get_sample. We
|
||||
- * use stop_machine, whith does (intentionally) introduce latency since we
|
||||
+ * disable interrupts, which does (intentionally) introduce latency since we
|
||||
* need to ensure nothing else might be running (and thus pre-empting).
|
||||
* Obviously this should never be used in production environments.
|
||||
*
|
||||
- * stop_machine will schedule us typically only on CPU0 which is fine for
|
||||
- * almost every real-world hardware latency situation - but we might later
|
||||
- * generalize this if we find there are any actualy systems with alternate
|
||||
- * SMI delivery or other non CPU0 hardware latencies.
|
||||
+ * Currently this runs on which ever CPU it was scheduled on, but most
|
||||
+ * real-worald hardware latency situations occur across several CPUs,
|
||||
+ * but we might later generalize this if we find there are any actualy
|
||||
+ * systems with alternate SMI delivery or other hardware latencies.
|
||||
*/
|
||||
static int kthread_fn(void *unused)
|
||||
{
|
||||
- int err = 0;
|
||||
- u64 interval = 0;
|
||||
+ int ret;
|
||||
+ u64 interval;
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
|
||||
mutex_lock(&data.lock);
|
||||
|
||||
- err = stop_machine(get_sample, unused, 0);
|
||||
- if (err) {
|
||||
- /* Houston, we have a problem */
|
||||
- mutex_unlock(&data.lock);
|
||||
- goto err_out;
|
||||
- }
|
||||
+ local_irq_disable();
|
||||
+ ret = get_sample();
|
||||
+ local_irq_enable();
|
||||
|
||||
- wake_up(&data.wq); /* wake up reader(s) */
|
||||
+ if (ret > 0)
|
||||
+ wake_up(&data.wq); /* wake up reader(s) */
|
||||
|
||||
interval = data.sample_window - data.sample_width;
|
||||
do_div(interval, USEC_PER_MSEC); /* modifies interval value */
|
||||
@@ -338,15 +337,10 @@ static int kthread_fn(void *unused)
|
||||
mutex_unlock(&data.lock);
|
||||
|
||||
if (msleep_interruptible(interval))
|
||||
- goto out;
|
||||
+ break;
|
||||
}
|
||||
- goto out;
|
||||
-err_out:
|
||||
- pr_err(BANNER "could not call stop_machine, disabling\n");
|
||||
- enabled = 0;
|
||||
-out:
|
||||
- return err;
|
||||
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -442,8 +436,7 @@ static int init_stats(void)
|
||||
* This function provides a generic read implementation for the global state
|
||||
* "data" structure debugfs filesystem entries. It would be nice to use
|
||||
* simple_attr_read directly, but we need to make sure that the data.lock
|
||||
- * spinlock is held during the actual read (even though we likely won't ever
|
||||
- * actually race here as the updater runs under a stop_machine context).
|
||||
+ * is held during the actual read.
|
||||
*/
|
||||
static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos, const u64 *entry)
|
||||
@@ -478,8 +471,7 @@ static ssize_t simple_data_read(struct f
|
||||
* This function provides a generic write implementation for the global state
|
||||
* "data" structure debugfs filesystem entries. It would be nice to use
|
||||
* simple_attr_write directly, but we need to make sure that the data.lock
|
||||
- * spinlock is held during the actual write (even though we likely won't ever
|
||||
- * actually race here as the updater runs under a stop_machine context).
|
||||
+ * is held during the actual write.
|
||||
*/
|
||||
static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos, u64 *entry)
|
|
@ -1,93 +0,0 @@
|
|||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Mon, 19 Aug 2013 17:33:26 -0400
|
||||
Subject: hwlat-detector: Use trace_clock_local if available
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
As ktime_get() calls into the timing code which does a read_seq(), it
|
||||
may be affected by other CPUS that touch that lock. To remove this
|
||||
dependency, use the trace_clock_local() which is already exported
|
||||
for module use. If CONFIG_TRACING is enabled, use that as the clock,
|
||||
otherwise use ktime_get().
|
||||
|
||||
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/misc/hwlat_detector.c | 34 +++++++++++++++++++++++++---------
|
||||
1 file changed, 25 insertions(+), 9 deletions(-)
|
||||
|
||||
--- a/drivers/misc/hwlat_detector.c
|
||||
+++ b/drivers/misc/hwlat_detector.c
|
||||
@@ -51,6 +51,7 @@
|
||||
#include <linux/version.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/slab.h>
|
||||
+#include <linux/trace_clock.h>
|
||||
|
||||
#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */
|
||||
#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */
|
||||
@@ -211,6 +212,21 @@ static struct sample *buffer_get_sample(
|
||||
return sample;
|
||||
}
|
||||
|
||||
+#ifndef CONFIG_TRACING
|
||||
+#define time_type ktime_t
|
||||
+#define time_get() ktime_get()
|
||||
+#define time_to_us(x) ktime_to_us(x)
|
||||
+#define time_sub(a, b) ktime_sub(a, b)
|
||||
+#define init_time(a, b) (a).tv64 = b
|
||||
+#define time_u64(a) ((a).tv64)
|
||||
+#else
|
||||
+#define time_type u64
|
||||
+#define time_get() trace_clock_local()
|
||||
+#define time_to_us(x) div_u64(x, 1000)
|
||||
+#define time_sub(a, b) ((a) - (b))
|
||||
+#define init_time(a, b) (a = b)
|
||||
+#define time_u64(a) a
|
||||
+#endif
|
||||
/**
|
||||
* get_sample - sample the CPU TSC and look for likely hardware latencies
|
||||
* @unused: This is not used but is a part of the stop_machine API
|
||||
@@ -220,23 +236,23 @@ static struct sample *buffer_get_sample(
|
||||
*/
|
||||
static int get_sample(void *unused)
|
||||
{
|
||||
- ktime_t start, t1, t2, last_t2;
|
||||
+ time_type start, t1, t2, last_t2;
|
||||
s64 diff, total = 0;
|
||||
u64 sample = 0;
|
||||
u64 outer_sample = 0;
|
||||
int ret = 1;
|
||||
|
||||
- last_t2.tv64 = 0;
|
||||
- start = ktime_get(); /* start timestamp */
|
||||
+ init_time(last_t2, 0);
|
||||
+ start = time_get(); /* start timestamp */
|
||||
|
||||
do {
|
||||
|
||||
- t1 = ktime_get(); /* we'll look for a discontinuity */
|
||||
- t2 = ktime_get();
|
||||
+ t1 = time_get(); /* we'll look for a discontinuity */
|
||||
+ t2 = time_get();
|
||||
|
||||
- if (last_t2.tv64) {
|
||||
+ if (time_u64(last_t2)) {
|
||||
/* Check the delta from outer loop (t2 to next t1) */
|
||||
- diff = ktime_to_us(ktime_sub(t1, last_t2));
|
||||
+ diff = time_to_us(time_sub(t1, last_t2));
|
||||
/* This shouldn't happen */
|
||||
if (diff < 0) {
|
||||
pr_err(BANNER "time running backwards\n");
|
||||
@@ -247,10 +263,10 @@ static int get_sample(void *unused)
|
||||
}
|
||||
last_t2 = t2;
|
||||
|
||||
- total = ktime_to_us(ktime_sub(t2, start)); /* sample width */
|
||||
+ total = time_to_us(time_sub(t2, start)); /* sample width */
|
||||
|
||||
/* This checks the inner loop (t1 to t2) */
|
||||
- diff = ktime_to_us(ktime_sub(t2, t1)); /* current diff */
|
||||
+ diff = time_to_us(time_sub(t2, t1)); /* current diff */
|
||||
|
||||
/* This shouldn't happen */
|
||||
if (diff < 0) {
|
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue