2020-06-23 09:08:47 +00:00
|
|
|
From 849650b8a4b915fa073d9dd0b07c6b2f706cb11d Mon Sep 17 00:00:00 2001
|
|
|
|
Message-Id: <849650b8a4b915fa073d9dd0b07c6b2f706cb11d.1590093525.git.zanussi@kernel.org>
|
|
|
|
In-Reply-To: <4f310e18bbb62e33196484e72d1a7d0416189d63.1590093525.git.zanussi@kernel.org>
|
|
|
|
References: <4f310e18bbb62e33196484e72d1a7d0416189d63.1590093525.git.zanussi@kernel.org>
|
2018-08-27 14:32:32 +00:00
|
|
|
From: Thomas Gleixner <tglx@linutronix.de>
|
|
|
|
Date: Thu, 12 Oct 2017 17:28:34 +0200
|
2020-06-22 13:14:16 +00:00
|
|
|
Subject: [PATCH 144/327] rtmutex: add rwsem implementation based on rtmutex
|
2020-06-23 09:08:47 +00:00
|
|
|
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.19/older/patches-4.19.124-rt53.tar.xz
|
2018-08-27 14:32:32 +00:00
|
|
|
|
|
|
|
The RT specific R/W semaphore implementation restricts the number of readers
|
|
|
|
to one because a writer cannot block on multiple readers and inherit its
|
|
|
|
priority or budget.
|
|
|
|
|
|
|
|
The single reader restricting is painful in various ways:
|
|
|
|
|
|
|
|
- Performance bottleneck for multi-threaded applications in the page fault
|
|
|
|
path (mmap sem)
|
|
|
|
|
|
|
|
- Progress blocker for drivers which are carefully crafted to avoid the
|
|
|
|
potential reader/writer deadlock in mainline.
|
|
|
|
|
|
|
|
The analysis of the writer code pathes shows, that properly written RT tasks
|
|
|
|
should not take them. Syscalls like mmap(), file access which take mmap sem
|
|
|
|
write locked have unbound latencies which are completely unrelated to mmap
|
|
|
|
sem. Other R/W sem users like graphics drivers are not suitable for RT tasks
|
|
|
|
either.
|
|
|
|
|
|
|
|
So there is little risk to hurt RT tasks when the RT rwsem implementation is
|
|
|
|
changed in the following way:
|
|
|
|
|
|
|
|
- Allow concurrent readers
|
|
|
|
|
|
|
|
- Make writers block until the last reader left the critical section. This
|
|
|
|
blocking is not subject to priority/budget inheritance.
|
|
|
|
|
|
|
|
- Readers blocked on a writer inherit their priority/budget in the normal
|
|
|
|
way.
|
|
|
|
|
|
|
|
There is a drawback with this scheme. R/W semaphores become writer unfair
|
|
|
|
though the applications which have triggered writer starvation (mostly on
|
|
|
|
mmap_sem) in the past are not really the typical workloads running on a RT
|
|
|
|
system. So while it's unlikely to hit writer starvation, it's possible. If
|
|
|
|
there are unexpected workloads on RT systems triggering it, we need to rethink
|
|
|
|
the approach.
|
|
|
|
|
|
|
|
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
|
|
|
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
|
|
|
---
|
2019-04-08 23:49:20 +00:00
|
|
|
include/linux/rwsem_rt.h | 68 +++++++++
|
|
|
|
kernel/locking/rwsem-rt.c | 293 ++++++++++++++++++++++++++++++++++++++
|
2018-08-27 14:32:32 +00:00
|
|
|
2 files changed, 361 insertions(+)
|
|
|
|
create mode 100644 include/linux/rwsem_rt.h
|
|
|
|
create mode 100644 kernel/locking/rwsem-rt.c
|
|
|
|
|
2019-04-08 23:49:20 +00:00
|
|
|
diff --git a/include/linux/rwsem_rt.h b/include/linux/rwsem_rt.h
|
|
|
|
new file mode 100644
|
|
|
|
index 000000000000..2018ff77904a
|
2018-08-27 14:32:32 +00:00
|
|
|
--- /dev/null
|
|
|
|
+++ b/include/linux/rwsem_rt.h
|
|
|
|
@@ -0,0 +1,68 @@
|
|
|
|
+#ifndef _LINUX_RWSEM_RT_H
|
|
|
|
+#define _LINUX_RWSEM_RT_H
|
|
|
|
+
|
|
|
|
+#ifndef _LINUX_RWSEM_H
|
|
|
|
+#error "Include rwsem.h"
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
+#include <linux/rtmutex.h>
|
|
|
|
+#include <linux/swait.h>
|
|
|
|
+
|
|
|
|
+#define READER_BIAS (1U << 31)
|
|
|
|
+#define WRITER_BIAS (1U << 30)
|
|
|
|
+
|
|
|
|
+struct rw_semaphore {
|
|
|
|
+ atomic_t readers;
|
|
|
|
+ struct rt_mutex rtmutex;
|
|
|
|
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
|
|
+ struct lockdep_map dep_map;
|
|
|
|
+#endif
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+#define __RWSEM_INITIALIZER(name) \
|
|
|
|
+{ \
|
|
|
|
+ .readers = ATOMIC_INIT(READER_BIAS), \
|
|
|
|
+ .rtmutex = __RT_MUTEX_INITIALIZER(name.rtmutex), \
|
|
|
|
+ RW_DEP_MAP_INIT(name) \
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+#define DECLARE_RWSEM(lockname) \
|
|
|
|
+ struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
|
|
|
|
+
|
|
|
|
+extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name,
|
|
|
|
+ struct lock_class_key *key);
|
|
|
|
+
|
|
|
|
+#define __init_rwsem(sem, name, key) \
|
|
|
|
+do { \
|
|
|
|
+ rt_mutex_init(&(sem)->rtmutex); \
|
|
|
|
+ __rwsem_init((sem), (name), (key)); \
|
|
|
|
+} while (0)
|
|
|
|
+
|
|
|
|
+#define init_rwsem(sem) \
|
|
|
|
+do { \
|
|
|
|
+ static struct lock_class_key __key; \
|
|
|
|
+ \
|
|
|
|
+ __init_rwsem((sem), #sem, &__key); \
|
|
|
|
+} while (0)
|
|
|
|
+
|
|
|
|
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
|
|
|
|
+{
|
|
|
|
+ return atomic_read(&sem->readers) != READER_BIAS;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline int rwsem_is_contended(struct rw_semaphore *sem)
|
|
|
|
+{
|
|
|
|
+ return atomic_read(&sem->readers) > 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+extern void __down_read(struct rw_semaphore *sem);
|
|
|
|
+extern int __down_read_killable(struct rw_semaphore *sem);
|
|
|
|
+extern int __down_read_trylock(struct rw_semaphore *sem);
|
|
|
|
+extern void __down_write(struct rw_semaphore *sem);
|
|
|
|
+extern int __must_check __down_write_killable(struct rw_semaphore *sem);
|
|
|
|
+extern int __down_write_trylock(struct rw_semaphore *sem);
|
|
|
|
+extern void __up_read(struct rw_semaphore *sem);
|
|
|
|
+extern void __up_write(struct rw_semaphore *sem);
|
|
|
|
+extern void __downgrade_write(struct rw_semaphore *sem);
|
|
|
|
+
|
|
|
|
+#endif
|
2019-04-08 23:49:20 +00:00
|
|
|
diff --git a/kernel/locking/rwsem-rt.c b/kernel/locking/rwsem-rt.c
|
|
|
|
new file mode 100644
|
|
|
|
index 000000000000..7d3c5cf3d23d
|
2018-08-27 14:32:32 +00:00
|
|
|
--- /dev/null
|
|
|
|
+++ b/kernel/locking/rwsem-rt.c
|
|
|
|
@@ -0,0 +1,293 @@
|
|
|
|
+/*
|
|
|
|
+ */
|
|
|
|
+#include <linux/rwsem.h>
|
|
|
|
+#include <linux/sched/debug.h>
|
|
|
|
+#include <linux/sched/signal.h>
|
|
|
|
+#include <linux/export.h>
|
|
|
|
+
|
|
|
|
+#include "rtmutex_common.h"
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * RT-specific reader/writer semaphores
|
|
|
|
+ *
|
|
|
|
+ * down_write()
|
|
|
|
+ * 1) Lock sem->rtmutex
|
|
|
|
+ * 2) Remove the reader BIAS to force readers into the slow path
|
|
|
|
+ * 3) Wait until all readers have left the critical region
|
|
|
|
+ * 4) Mark it write locked
|
|
|
|
+ *
|
|
|
|
+ * up_write()
|
|
|
|
+ * 1) Remove the write locked marker
|
|
|
|
+ * 2) Set the reader BIAS so readers can use the fast path again
|
|
|
|
+ * 3) Unlock sem->rtmutex to release blocked readers
|
|
|
|
+ *
|
|
|
|
+ * down_read()
|
|
|
|
+ * 1) Try fast path acquisition (reader BIAS is set)
|
|
|
|
+ * 2) Take sem->rtmutex.wait_lock which protects the writelocked flag
|
|
|
|
+ * 3) If !writelocked, acquire it for read
|
|
|
|
+ * 4) If writelocked, block on sem->rtmutex
|
|
|
|
+ * 5) unlock sem->rtmutex, goto 1)
|
|
|
|
+ *
|
|
|
|
+ * up_read()
|
|
|
|
+ * 1) Try fast path release (reader count != 1)
|
|
|
|
+ * 2) Wake the writer waiting in down_write()#3
|
|
|
|
+ *
|
|
|
|
+ * down_read()#3 has the consequence, that rw semaphores on RT are not writer
|
|
|
|
+ * fair, but writers, which should be avoided in RT tasks (think mmap_sem),
|
|
|
|
+ * are subject to the rtmutex priority/DL inheritance mechanism.
|
|
|
|
+ *
|
|
|
|
+ * It's possible to make the rw semaphores writer fair by keeping a list of
|
|
|
|
+ * active readers. A blocked writer would force all newly incoming readers to
|
|
|
|
+ * block on the rtmutex, but the rtmutex would have to be proxy locked for one
|
|
|
|
+ * reader after the other. We can't use multi-reader inheritance because there
|
|
|
|
+ * is no way to support that with SCHED_DEADLINE. Implementing the one by one
|
|
|
|
+ * reader boosting/handover mechanism is a major surgery for a very dubious
|
|
|
|
+ * value.
|
|
|
|
+ *
|
|
|
|
+ * The risk of writer starvation is there, but the pathological use cases
|
|
|
|
+ * which trigger it are not necessarily the typical RT workloads.
|
|
|
|
+ */
|
|
|
|
+
|
|
|
|
+void __rwsem_init(struct rw_semaphore *sem, const char *name,
|
|
|
|
+ struct lock_class_key *key)
|
|
|
|
+{
|
|
|
|
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
|
|
+ /*
|
|
|
|
+ * Make sure we are not reinitializing a held semaphore:
|
|
|
|
+ */
|
|
|
|
+ debug_check_no_locks_freed((void *)sem, sizeof(*sem));
|
|
|
|
+ lockdep_init_map(&sem->dep_map, name, key, 0);
|
|
|
|
+#endif
|
|
|
|
+ atomic_set(&sem->readers, READER_BIAS);
|
|
|
|
+}
|
|
|
|
+EXPORT_SYMBOL(__rwsem_init);
|
|
|
|
+
|
|
|
|
+int __down_read_trylock(struct rw_semaphore *sem)
|
|
|
|
+{
|
|
|
|
+ int r, old;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Increment reader count, if sem->readers < 0, i.e. READER_BIAS is
|
|
|
|
+ * set.
|
|
|
|
+ */
|
|
|
|
+ for (r = atomic_read(&sem->readers); r < 0;) {
|
|
|
|
+ old = atomic_cmpxchg(&sem->readers, r, r + 1);
|
|
|
|
+ if (likely(old == r))
|
|
|
|
+ return 1;
|
|
|
|
+ r = old;
|
|
|
|
+ }
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int __sched __down_read_common(struct rw_semaphore *sem, int state)
|
|
|
|
+{
|
|
|
|
+ struct rt_mutex *m = &sem->rtmutex;
|
|
|
|
+ struct rt_mutex_waiter waiter;
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ if (__down_read_trylock(sem))
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+ might_sleep();
|
|
|
|
+ raw_spin_lock_irq(&m->wait_lock);
|
|
|
|
+ /*
|
|
|
|
+ * Allow readers as long as the writer has not completely
|
|
|
|
+ * acquired the semaphore for write.
|
|
|
|
+ */
|
|
|
|
+ if (atomic_read(&sem->readers) != WRITER_BIAS) {
|
|
|
|
+ atomic_inc(&sem->readers);
|
|
|
|
+ raw_spin_unlock_irq(&m->wait_lock);
|
|
|
|
+ return 0;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Call into the slow lock path with the rtmutex->wait_lock
|
|
|
|
+ * held, so this can't result in the following race:
|
|
|
|
+ *
|
|
|
|
+ * Reader1 Reader2 Writer
|
|
|
|
+ * down_read()
|
|
|
|
+ * down_write()
|
|
|
|
+ * rtmutex_lock(m)
|
|
|
|
+ * swait()
|
|
|
|
+ * down_read()
|
|
|
|
+ * unlock(m->wait_lock)
|
|
|
|
+ * up_read()
|
|
|
|
+ * swake()
|
|
|
|
+ * lock(m->wait_lock)
|
|
|
|
+ * sem->writelocked=true
|
|
|
|
+ * unlock(m->wait_lock)
|
|
|
|
+ *
|
|
|
|
+ * up_write()
|
|
|
|
+ * sem->writelocked=false
|
|
|
|
+ * rtmutex_unlock(m)
|
|
|
|
+ * down_read()
|
|
|
|
+ * down_write()
|
|
|
|
+ * rtmutex_lock(m)
|
|
|
|
+ * swait()
|
|
|
|
+ * rtmutex_lock(m)
|
|
|
|
+ *
|
|
|
|
+ * That would put Reader1 behind the writer waiting on
|
|
|
|
+ * Reader2 to call up_read() which might be unbound.
|
|
|
|
+ */
|
|
|
|
+ rt_mutex_init_waiter(&waiter, false);
|
|
|
|
+ ret = rt_mutex_slowlock_locked(m, state, NULL, RT_MUTEX_MIN_CHAINWALK,
|
|
|
|
+ &waiter);
|
|
|
|
+ /*
|
|
|
|
+ * The slowlock() above is guaranteed to return with the rtmutex (for
|
|
|
|
+ * ret = 0) is now held, so there can't be a writer active. Increment
|
|
|
|
+ * the reader count and immediately drop the rtmutex again.
|
|
|
|
+ * For ret != 0 we don't hold the rtmutex and need unlock the wait_lock.
|
|
|
|
+ * We don't own the lock then.
|
|
|
|
+ */
|
|
|
|
+ if (!ret)
|
|
|
|
+ atomic_inc(&sem->readers);
|
|
|
|
+ raw_spin_unlock_irq(&m->wait_lock);
|
|
|
|
+ if (!ret)
|
|
|
|
+ __rt_mutex_unlock(m);
|
|
|
|
+
|
|
|
|
+ debug_rt_mutex_free_waiter(&waiter);
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+void __down_read(struct rw_semaphore *sem)
|
|
|
|
+{
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ ret = __down_read_common(sem, TASK_UNINTERRUPTIBLE);
|
|
|
|
+ WARN_ON_ONCE(ret);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+int __down_read_killable(struct rw_semaphore *sem)
|
|
|
|
+{
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ ret = __down_read_common(sem, TASK_KILLABLE);
|
|
|
|
+ if (likely(!ret))
|
|
|
|
+ return ret;
|
|
|
|
+ WARN_ONCE(ret != -EINTR, "Unexpected state: %d\n", ret);
|
|
|
|
+ return -EINTR;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+void __up_read(struct rw_semaphore *sem)
|
|
|
|
+{
|
|
|
|
+ struct rt_mutex *m = &sem->rtmutex;
|
|
|
|
+ struct task_struct *tsk;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * sem->readers can only hit 0 when a writer is waiting for the
|
|
|
|
+ * active readers to leave the critical region.
|
|
|
|
+ */
|
|
|
|
+ if (!atomic_dec_and_test(&sem->readers))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ might_sleep();
|
|
|
|
+ raw_spin_lock_irq(&m->wait_lock);
|
|
|
|
+ /*
|
|
|
|
+ * Wake the writer, i.e. the rtmutex owner. It might release the
|
|
|
|
+ * rtmutex concurrently in the fast path (due to a signal), but to
|
|
|
|
+ * clean up the rwsem it needs to acquire m->wait_lock. The worst
|
|
|
|
+ * case which can happen is a spurious wakeup.
|
|
|
|
+ */
|
|
|
|
+ tsk = rt_mutex_owner(m);
|
|
|
|
+ if (tsk)
|
|
|
|
+ wake_up_process(tsk);
|
|
|
|
+
|
|
|
|
+ raw_spin_unlock_irq(&m->wait_lock);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void __up_write_unlock(struct rw_semaphore *sem, int bias,
|
|
|
|
+ unsigned long flags)
|
|
|
|
+{
|
|
|
|
+ struct rt_mutex *m = &sem->rtmutex;
|
|
|
|
+
|
|
|
|
+ atomic_add(READER_BIAS - bias, &sem->readers);
|
|
|
|
+ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
|
|
|
|
+ __rt_mutex_unlock(m);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int __sched __down_write_common(struct rw_semaphore *sem, int state)
|
|
|
|
+{
|
|
|
|
+ struct rt_mutex *m = &sem->rtmutex;
|
|
|
|
+ unsigned long flags;
|
|
|
|
+
|
|
|
|
+ /* Take the rtmutex as a first step */
|
|
|
|
+ if (__rt_mutex_lock_state(m, state))
|
|
|
|
+ return -EINTR;
|
|
|
|
+
|
|
|
|
+ /* Force readers into slow path */
|
|
|
|
+ atomic_sub(READER_BIAS, &sem->readers);
|
|
|
|
+ might_sleep();
|
|
|
|
+
|
|
|
|
+ set_current_state(state);
|
|
|
|
+ for (;;) {
|
|
|
|
+ raw_spin_lock_irqsave(&m->wait_lock, flags);
|
|
|
|
+ /* Have all readers left the critical region? */
|
|
|
|
+ if (!atomic_read(&sem->readers)) {
|
|
|
|
+ atomic_set(&sem->readers, WRITER_BIAS);
|
|
|
|
+ __set_current_state(TASK_RUNNING);
|
|
|
|
+ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
|
|
|
|
+ return 0;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (signal_pending_state(state, current)) {
|
|
|
|
+ __set_current_state(TASK_RUNNING);
|
|
|
|
+ __up_write_unlock(sem, 0, flags);
|
|
|
|
+ return -EINTR;
|
|
|
|
+ }
|
|
|
|
+ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
|
|
|
|
+
|
|
|
|
+ if (atomic_read(&sem->readers) != 0) {
|
|
|
|
+ schedule();
|
|
|
|
+ set_current_state(state);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+void __sched __down_write(struct rw_semaphore *sem)
|
|
|
|
+{
|
|
|
|
+ __down_write_common(sem, TASK_UNINTERRUPTIBLE);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+int __sched __down_write_killable(struct rw_semaphore *sem)
|
|
|
|
+{
|
|
|
|
+ return __down_write_common(sem, TASK_KILLABLE);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+int __down_write_trylock(struct rw_semaphore *sem)
|
|
|
|
+{
|
|
|
|
+ struct rt_mutex *m = &sem->rtmutex;
|
|
|
|
+ unsigned long flags;
|
|
|
|
+
|
|
|
|
+ if (!__rt_mutex_trylock(m))
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+ atomic_sub(READER_BIAS, &sem->readers);
|
|
|
|
+
|
|
|
|
+ raw_spin_lock_irqsave(&m->wait_lock, flags);
|
|
|
|
+ if (!atomic_read(&sem->readers)) {
|
|
|
|
+ atomic_set(&sem->readers, WRITER_BIAS);
|
|
|
|
+ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
|
|
|
|
+ return 1;
|
|
|
|
+ }
|
|
|
|
+ __up_write_unlock(sem, 0, flags);
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+void __up_write(struct rw_semaphore *sem)
|
|
|
|
+{
|
|
|
|
+ struct rt_mutex *m = &sem->rtmutex;
|
|
|
|
+ unsigned long flags;
|
|
|
|
+
|
|
|
|
+ raw_spin_lock_irqsave(&m->wait_lock, flags);
|
|
|
|
+ __up_write_unlock(sem, WRITER_BIAS, flags);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+void __downgrade_write(struct rw_semaphore *sem)
|
|
|
|
+{
|
|
|
|
+ struct rt_mutex *m = &sem->rtmutex;
|
|
|
|
+ unsigned long flags;
|
|
|
|
+
|
|
|
|
+ raw_spin_lock_irqsave(&m->wait_lock, flags);
|
|
|
|
+ /* Release it and account current as reader */
|
|
|
|
+ __up_write_unlock(sem, WRITER_BIAS - 1, flags);
|
|
|
|
+}
|
2020-01-03 23:36:11 +00:00
|
|
|
--
|
2020-06-22 13:14:16 +00:00
|
|
|
2.17.1
|
2020-01-03 23:36:11 +00:00
|
|
|
|