linux/debian/patches/features/all/rt/rt-mutex-add-sleeping-spinl...

639 lines
18 KiB
Diff

Subject: rt-mutex-add-sleeping-spinlocks-support.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Jun 2011 11:21:25 +0200
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.0/patches-4.0.5-rt3.tar.xz
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/kernel.h | 4
include/linux/rtmutex.h | 27 ++
include/linux/sched.h | 5
kernel/futex.c | 5
kernel/locking/rtmutex.c | 379 +++++++++++++++++++++++++++++++++++++---
kernel/locking/rtmutex_common.h | 11 +
6 files changed, 401 insertions(+), 30 deletions(-)
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -176,6 +176,9 @@ extern int _cond_resched(void);
*/
# define might_sleep() \
do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
+
+# define might_sleep_no_state_check() \
+ do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
# define sched_annotate_sleep() (current->task_state_change = 0)
#else
static inline void ___might_sleep(const char *file, int line,
@@ -183,6 +186,7 @@ extern int _cond_resched(void);
static inline void __might_sleep(const char *file, int line,
int preempt_offset) { }
# define might_sleep() do { might_resched(); } while (0)
+# define might_sleep_no_state_check() do { might_resched(); } while (0)
# define sched_annotate_sleep() do { } while (0)
#endif
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -18,6 +18,10 @@
extern int max_lock_depth; /* for sysctl */
+#ifdef CONFIG_DEBUG_MUTEXES
+#include <linux/debug_locks.h>
+#endif
+
/**
* The rt_mutex structure
*
@@ -31,8 +35,8 @@ struct rt_mutex {
struct rb_root waiters;
struct rb_node *waiters_leftmost;
struct task_struct *owner;
-#ifdef CONFIG_DEBUG_RT_MUTEXES
int save_state;
+#ifdef CONFIG_DEBUG_RT_MUTEXES
const char *name, *file;
int line;
void *magic;
@@ -55,22 +59,33 @@ struct hrtimer_sleeper;
# define rt_mutex_debug_check_no_locks_held(task) do { } while (0)
#endif
+# define rt_mutex_init(mutex) \
+ do { \
+ raw_spin_lock_init(&(mutex)->wait_lock); \
+ __rt_mutex_init(mutex, #mutex); \
+ } while (0)
+
#ifdef CONFIG_DEBUG_RT_MUTEXES
# define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
, .name = #mutexname, .file = __FILE__, .line = __LINE__
-# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__)
extern void rt_mutex_debug_task_free(struct task_struct *tsk);
#else
# define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
-# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL)
# define rt_mutex_debug_task_free(t) do { } while (0)
#endif
-#define __RT_MUTEX_INITIALIZER(mutexname) \
- { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
+#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
, .waiters = RB_ROOT \
, .owner = NULL \
- __DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
+ __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
+
+#define __RT_MUTEX_INITIALIZER(mutexname) \
+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) }
+
+#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \
+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
+ , .save_state = 1 }
#define DEFINE_RT_MUTEX(mutexname) \
struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -308,6 +308,11 @@ extern char ___assert_task_state[1 - 2*!
#endif
+#define __set_current_state_no_track(state_value) \
+ do { current->state = (state_value); } while (0)
+#define set_current_state_no_track(state_value) \
+ set_mb(current->state, (state_value))
+
/* Task command name length */
#define TASK_COMM_LEN 16
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2586,10 +2586,7 @@ static int futex_wait_requeue_pi(u32 __u
* The waiter is allocated on our stack, manipulated by the requeue
* code while we sleep on uaddr.
*/
- debug_rt_mutex_init_waiter(&rt_waiter);
- RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
- RB_CLEAR_NODE(&rt_waiter.tree_entry);
- rt_waiter.task = NULL;
+ rt_mutex_init_waiter(&rt_waiter, false);
ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
if (unlikely(ret != 0))
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -7,6 +7,11 @@
* Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
* Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
* Copyright (C) 2006 Esben Nielsen
+ * Adaptive Spinlocks:
+ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
+ * and Peter Morreale,
+ * Adaptive Spinlocks simplification:
+ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
*
* See Documentation/locking/rt-mutex-design.txt for details.
*/
@@ -341,6 +346,14 @@ static bool rt_mutex_cond_detect_deadloc
return debug_rt_mutex_detect_deadlock(waiter, chwalk);
}
+static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
+{
+ if (waiter->savestate)
+ wake_up_lock_sleeper(waiter->task);
+ else
+ wake_up_process(waiter->task);
+}
+
/*
* Max number of times we'll walk the boosting chain:
*/
@@ -647,13 +660,16 @@ static int rt_mutex_adjust_prio_chain(st
* follow here. This is the end of the chain we are walking.
*/
if (!rt_mutex_owner(lock)) {
+ struct rt_mutex_waiter *lock_top_waiter;
+
/*
* If the requeue [7] above changed the top waiter,
* then we need to wake the new top waiter up to try
* to get the lock.
*/
- if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
- wake_up_process(rt_mutex_top_waiter(lock)->task);
+ lock_top_waiter = rt_mutex_top_waiter(lock);
+ if (prerequeue_top_waiter != lock_top_waiter)
+ rt_mutex_wake_waiter(lock_top_waiter);
raw_spin_unlock(&lock->wait_lock);
return 0;
}
@@ -746,6 +762,25 @@ static int rt_mutex_adjust_prio_chain(st
return ret;
}
+
+#define STEAL_NORMAL 0
+#define STEAL_LATERAL 1
+
+/*
+ * Note that RT tasks are excluded from lateral-steals to prevent the
+ * introduction of an unbounded latency
+ */
+static inline int lock_is_stealable(struct task_struct *task,
+ struct task_struct *pendowner, int mode)
+{
+ if (mode == STEAL_NORMAL || rt_task(task)) {
+ if (task->prio >= pendowner->prio)
+ return 0;
+ } else if (task->prio > pendowner->prio)
+ return 0;
+ return 1;
+}
+
/*
* Try to take an rt-mutex
*
@@ -756,8 +791,9 @@ static int rt_mutex_adjust_prio_chain(st
* @waiter: The waiter that is queued to the lock's wait list if the
* callsite called task_blocked_on_lock(), otherwise NULL
*/
-static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
- struct rt_mutex_waiter *waiter)
+static int __try_to_take_rt_mutex(struct rt_mutex *lock,
+ struct task_struct *task,
+ struct rt_mutex_waiter *waiter, int mode)
{
unsigned long flags;
@@ -796,8 +832,10 @@ static int try_to_take_rt_mutex(struct r
* If waiter is not the highest priority waiter of
* @lock, give up.
*/
- if (waiter != rt_mutex_top_waiter(lock))
+ if (waiter != rt_mutex_top_waiter(lock)) {
+ /* XXX lock_is_stealable() ? */
return 0;
+ }
/*
* We can acquire the lock. Remove the waiter from the
@@ -815,14 +853,10 @@ static int try_to_take_rt_mutex(struct r
* not need to be dequeued.
*/
if (rt_mutex_has_waiters(lock)) {
- /*
- * If @task->prio is greater than or equal to
- * the top waiter priority (kernel view),
- * @task lost.
- */
- if (task->prio >= rt_mutex_top_waiter(lock)->prio)
- return 0;
+ struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
+ if (task != pown && !lock_is_stealable(task, pown, mode))
+ return 0;
/*
* The current top waiter stays enqueued. We
* don't have to change anything in the lock
@@ -871,6 +905,314 @@ static int try_to_take_rt_mutex(struct r
return 1;
}
+#ifdef CONFIG_PREEMPT_RT_FULL
+/*
+ * preemptible spin_lock functions:
+ */
+static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
+ void (*slowfn)(struct rt_mutex *lock))
+{
+ might_sleep_no_state_check();
+
+ if (likely(rt_mutex_cmpxchg(lock, NULL, current)))
+ rt_mutex_deadlock_account_lock(lock, current);
+ else
+ slowfn(lock);
+}
+
+static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
+ void (*slowfn)(struct rt_mutex *lock))
+{
+ if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
+ rt_mutex_deadlock_account_unlock(current);
+ else
+ slowfn(lock);
+}
+#ifdef CONFIG_SMP
+/*
+ * Note that owner is a speculative pointer and dereferencing relies
+ * on rcu_read_lock() and the check against the lock owner.
+ */
+static int adaptive_wait(struct rt_mutex *lock,
+ struct task_struct *owner)
+{
+ int res = 0;
+
+ rcu_read_lock();
+ for (;;) {
+ if (owner != rt_mutex_owner(lock))
+ break;
+ /*
+ * Ensure that owner->on_cpu is dereferenced _after_
+ * checking the above to be valid.
+ */
+ barrier();
+ if (!owner->on_cpu) {
+ res = 1;
+ break;
+ }
+ cpu_relax();
+ }
+ rcu_read_unlock();
+ return res;
+}
+#else
+static int adaptive_wait(struct rt_mutex *lock,
+ struct task_struct *orig_owner)
+{
+ return 1;
+}
+#endif
+
+# define pi_lock(lock) raw_spin_lock_irq(lock)
+# define pi_unlock(lock) raw_spin_unlock_irq(lock)
+
+static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *task,
+ enum rtmutex_chainwalk chwalk);
+/*
+ * Slow path lock function spin_lock style: this variant is very
+ * careful not to miss any non-lock wakeups.
+ *
+ * We store the current state under p->pi_lock in p->saved_state and
+ * the try_to_wake_up() code handles this accordingly.
+ */
+static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock)
+{
+ struct task_struct *lock_owner, *self = current;
+ struct rt_mutex_waiter waiter, *top_waiter;
+ int ret;
+
+ rt_mutex_init_waiter(&waiter, true);
+
+ raw_spin_lock(&lock->wait_lock);
+
+ if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
+ raw_spin_unlock(&lock->wait_lock);
+ return;
+ }
+
+ BUG_ON(rt_mutex_owner(lock) == self);
+
+ /*
+ * We save whatever state the task is in and we'll restore it
+ * after acquiring the lock taking real wakeups into account
+ * as well. We are serialized via pi_lock against wakeups. See
+ * try_to_wake_up().
+ */
+ pi_lock(&self->pi_lock);
+ self->saved_state = self->state;
+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
+ pi_unlock(&self->pi_lock);
+
+ ret = task_blocks_on_rt_mutex(lock, &waiter, self, 0);
+ BUG_ON(ret);
+
+ for (;;) {
+ /* Try to acquire the lock again. */
+ if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
+ break;
+
+ top_waiter = rt_mutex_top_waiter(lock);
+ lock_owner = rt_mutex_owner(lock);
+
+ raw_spin_unlock(&lock->wait_lock);
+
+ debug_rt_mutex_print_deadlock(&waiter);
+
+ if (top_waiter != &waiter || adaptive_wait(lock, lock_owner))
+ schedule_rt_mutex(lock);
+
+ raw_spin_lock(&lock->wait_lock);
+
+ pi_lock(&self->pi_lock);
+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
+ pi_unlock(&self->pi_lock);
+ }
+
+ /*
+ * Restore the task state to current->saved_state. We set it
+ * to the original state above and the try_to_wake_up() code
+ * has possibly updated it when a real (non-rtmutex) wakeup
+ * happened while we were blocked. Clear saved_state so
+ * try_to_wakeup() does not get confused.
+ */
+ pi_lock(&self->pi_lock);
+ __set_current_state_no_track(self->saved_state);
+ self->saved_state = TASK_RUNNING;
+ pi_unlock(&self->pi_lock);
+
+ /*
+ * try_to_take_rt_mutex() sets the waiter bit
+ * unconditionally. We might have to fix that up:
+ */
+ fixup_rt_mutex_waiters(lock);
+
+ BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
+ BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry));
+
+ raw_spin_unlock(&lock->wait_lock);
+
+ debug_rt_mutex_free_waiter(&waiter);
+}
+
+static void wakeup_next_waiter(struct rt_mutex *lock);
+/*
+ * Slow path to release a rt_mutex spin_lock style
+ */
+static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
+{
+ raw_spin_lock(&lock->wait_lock);
+
+ debug_rt_mutex_unlock(lock);
+
+ rt_mutex_deadlock_account_unlock(current);
+
+ if (!rt_mutex_has_waiters(lock)) {
+ lock->owner = NULL;
+ raw_spin_unlock(&lock->wait_lock);
+ return;
+ }
+
+ wakeup_next_waiter(lock);
+
+ raw_spin_unlock(&lock->wait_lock);
+
+ /* Undo pi boosting.when necessary */
+ rt_mutex_adjust_prio(current);
+}
+
+void __lockfunc rt_spin_lock(spinlock_t *lock)
+{
+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+}
+EXPORT_SYMBOL(rt_spin_lock);
+
+void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
+{
+ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock);
+}
+EXPORT_SYMBOL(__rt_spin_lock);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
+{
+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
+ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+}
+EXPORT_SYMBOL(rt_spin_lock_nested);
+#endif
+
+void __lockfunc rt_spin_unlock(spinlock_t *lock)
+{
+ /* NOTE: we always pass in '1' for nested, for simplicity */
+ spin_release(&lock->dep_map, 1, _RET_IP_);
+ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
+}
+EXPORT_SYMBOL(rt_spin_unlock);
+
+void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
+{
+ rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
+}
+EXPORT_SYMBOL(__rt_spin_unlock);
+
+/*
+ * Wait for the lock to get unlocked: instead of polling for an unlock
+ * (like raw spinlocks do), we lock and unlock, to force the kernel to
+ * schedule if there's contention:
+ */
+void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
+{
+ spin_lock(lock);
+ spin_unlock(lock);
+}
+EXPORT_SYMBOL(rt_spin_unlock_wait);
+
+int __lockfunc rt_spin_trylock(spinlock_t *lock)
+{
+ int ret;
+
+ migrate_disable();
+ ret = rt_mutex_trylock(&lock->lock);
+ if (ret)
+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+ else
+ migrate_enable();
+
+ return ret;
+}
+EXPORT_SYMBOL(rt_spin_trylock);
+
+int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
+{
+ int ret;
+
+ local_bh_disable();
+ ret = rt_mutex_trylock(&lock->lock);
+ if (ret) {
+ migrate_disable();
+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+ } else
+ local_bh_enable();
+ return ret;
+}
+EXPORT_SYMBOL(rt_spin_trylock_bh);
+
+int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
+{
+ int ret;
+
+ *flags = 0;
+ migrate_disable();
+ ret = rt_mutex_trylock(&lock->lock);
+ if (ret)
+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+ else
+ migrate_enable();
+ return ret;
+}
+EXPORT_SYMBOL(rt_spin_trylock_irqsave);
+
+int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
+{
+ /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
+ if (atomic_add_unless(atomic, -1, 1))
+ return 0;
+ migrate_disable();
+ rt_spin_lock(lock);
+ if (atomic_dec_and_test(atomic))
+ return 1;
+ rt_spin_unlock(lock);
+ migrate_enable();
+ return 0;
+}
+EXPORT_SYMBOL(atomic_dec_and_spin_lock);
+
+ void
+__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ /*
+ * Make sure we are not reinitializing a held lock:
+ */
+ debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+ lockdep_init_map(&lock->dep_map, name, key, 0);
+#endif
+}
+EXPORT_SYMBOL(__rt_spin_lock_init);
+
+#endif /* PREEMPT_RT_FULL */
+
+static inline int
+try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
+ struct rt_mutex_waiter *waiter)
+{
+ return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
+}
+
/*
* Task blocks on lock.
*
@@ -1019,7 +1361,7 @@ static void wakeup_next_waiter(struct rt
* long as we hold lock->wait_lock. The waiter task needs to
* acquire it in order to dequeue the waiter.
*/
- wake_up_process(waiter->task);
+ rt_mutex_wake_waiter(waiter);
}
/*
@@ -1101,11 +1443,11 @@ void rt_mutex_adjust_pi(struct task_stru
return;
}
next_lock = waiter->lock;
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
/* gets dropped in rt_mutex_adjust_prio_chain()! */
get_task_struct(task);
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
next_lock, NULL, task);
}
@@ -1191,9 +1533,7 @@ rt_mutex_slowlock(struct rt_mutex *lock,
struct rt_mutex_waiter waiter;
int ret = 0;
- debug_rt_mutex_init_waiter(&waiter);
- RB_CLEAR_NODE(&waiter.pi_tree_entry);
- RB_CLEAR_NODE(&waiter.tree_entry);
+ rt_mutex_init_waiter(&waiter, false);
raw_spin_lock(&lock->wait_lock);
@@ -1535,13 +1875,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
void __rt_mutex_init(struct rt_mutex *lock, const char *name)
{
lock->owner = NULL;
- raw_spin_lock_init(&lock->wait_lock);
lock->waiters = RB_ROOT;
lock->waiters_leftmost = NULL;
debug_rt_mutex_init(lock, name);
}
-EXPORT_SYMBOL_GPL(__rt_mutex_init);
+EXPORT_SYMBOL(__rt_mutex_init);
/**
* rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
@@ -1556,7 +1895,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init);
void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
struct task_struct *proxy_owner)
{
- __rt_mutex_init(lock, NULL);
+ rt_mutex_init(lock);
debug_rt_mutex_proxy_lock(lock, proxy_owner);
rt_mutex_set_owner(lock, proxy_owner);
rt_mutex_deadlock_account_lock(lock, proxy_owner);
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -49,6 +49,7 @@ struct rt_mutex_waiter {
struct rb_node pi_tree_entry;
struct task_struct *task;
struct rt_mutex *lock;
+ bool savestate;
#ifdef CONFIG_DEBUG_RT_MUTEXES
unsigned long ip;
struct pid *deadlock_task_pid;
@@ -141,4 +142,14 @@ extern int rt_mutex_timed_futex_lock(str
# include "rtmutex.h"
#endif
+static inline void
+rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
+{
+ debug_rt_mutex_init_waiter(waiter);
+ waiter->task = NULL;
+ waiter->savestate = savestate;
+ RB_CLEAR_NODE(&waiter->pi_tree_entry);
+ RB_CLEAR_NODE(&waiter->tree_entry);
+}
+
#endif