From 50c77b51735c142605d4621a971453a3705eb233 Mon Sep 17 00:00:00 2001 Message-Id: <50c77b51735c142605d4621a971453a3705eb233.1596234184.git.zanussi@kernel.org> In-Reply-To: <378ee68279f6a7631221f2670a9298620148690d.1596234183.git.zanussi@kernel.org> References: <378ee68279f6a7631221f2670a9298620148690d.1596234183.git.zanussi@kernel.org> From: Sebastian Andrzej Siewior Date: Mon, 24 Jun 2019 19:39:06 +0200 Subject: [PATCH 271/329] timers: Redo the notification of canceling timers on -RT Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.19/older/patches-4.19.135-rt60.tar.xz [ Upstream commit c71273154c2ad12e13333aada340ff30e826a11b ] Rework of the hrtimer, timer and posix-timer cancelation interface on -RT. Instead of the swait/schedule interface we now have locks which are taken while timer is active. During the cancellation of an active timer the lock is acquired. The lock will then either PI-boost the timer or block and wait until the timer completed. The new code looks simpler and does not trigger a warning from rcu_note_context_switch() anymore like reported by Grygorii Strashko and Daniel Wagner. The patches were contributed by Anna-Maria Gleixner. This is an all in one commit of the following patches: | [PATCH] timers: Introduce expiry spin lock | [PATCH] timers: Drop expiry lock after each timer invocation | [PATCH] hrtimer: Introduce expiry spin lock | [PATCH] posix-timers: move rcu out of union | [PATCH] posix-timers: Add expiry lock Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Steven Rostedt (VMware) --- fs/timerfd.c | 5 +- include/linux/hrtimer.h | 17 ++---- include/linux/posix-timers.h | 1 + kernel/time/alarmtimer.c | 2 +- kernel/time/hrtimer.c | 36 ++++--------- kernel/time/itimer.c | 2 +- kernel/time/posix-cpu-timers.c | 23 ++++++++ kernel/time/posix-timers.c | 69 ++++++++++-------------- kernel/time/posix-timers.h | 2 + kernel/time/timer.c | 96 ++++++++++++++++------------------ 10 files changed, 118 insertions(+), 135 deletions(-) --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -471,10 +471,11 @@ break; } spin_unlock_irq(&ctx->wqh.lock); + if (isalarm(ctx)) - hrtimer_wait_for_timer(&ctx->t.alarm.timer); + hrtimer_grab_expiry_lock(&ctx->t.alarm.timer); else - hrtimer_wait_for_timer(&ctx->t.tmr); + hrtimer_grab_expiry_lock(&ctx->t.tmr); } /* --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -22,7 +22,6 @@ #include #include #include -#include struct hrtimer_clock_base; struct hrtimer_cpu_base; @@ -193,6 +192,8 @@ * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are + * expired * @expires_next: absolute time of the next event, is required for remote * hrtimer enqueue; it is the total first expiry time (hard * and soft hrtimer are taken into account) @@ -220,12 +221,10 @@ unsigned short nr_hangs; unsigned int max_hang_time; #endif + spinlock_t softirq_expiry_lock; ktime_t expires_next; struct hrtimer *next_timer; ktime_t softirq_expires_next; -#ifdef CONFIG_PREEMPT_RT_BASE - wait_queue_head_t wait; -#endif struct hrtimer *softirq_next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; } ____cacheline_aligned; @@ -426,6 +425,7 @@ extern int hrtimer_cancel(struct hrtimer *timer); extern int hrtimer_try_to_cancel(struct hrtimer *timer); +extern void hrtimer_grab_expiry_lock(const struct hrtimer *timer); static inline void hrtimer_start_expires(struct hrtimer *timer, enum hrtimer_mode mode) @@ -443,13 +443,6 @@ hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } -/* Softirq preemption could deadlock timer removal */ -#ifdef CONFIG_PREEMPT_RT_BASE - extern void hrtimer_wait_for_timer(const struct hrtimer *timer); -#else -# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0) -#endif - /* Query timers: */ extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust); @@ -481,7 +474,7 @@ * Helper function to check, whether the timer is running the callback * function */ -static inline int hrtimer_callback_running(const struct hrtimer *timer) +static inline int hrtimer_callback_running(struct hrtimer *timer) { return timer->base->running == timer; } --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -15,6 +15,7 @@ u64 expires, incr; struct task_struct *task; int firing; + int firing_cpu; }; /* --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -438,7 +438,7 @@ int ret = alarm_try_to_cancel(alarm); if (ret >= 0) return ret; - hrtimer_wait_for_timer(&alarm->timer); + hrtimer_grab_expiry_lock(&alarm->timer); } } EXPORT_SYMBOL_GPL(alarm_cancel); --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -963,33 +963,16 @@ } EXPORT_SYMBOL_GPL(hrtimer_forward); -#ifdef CONFIG_PREEMPT_RT_BASE -# define wake_up_timer_waiters(b) wake_up(&(b)->wait) - -/** - * hrtimer_wait_for_timer - Wait for a running timer - * - * @timer: timer to wait for - * - * The function waits in case the timers callback function is - * currently executed on the waitqueue of the timer base. The - * waitqueue is woken up after the timer callback function has - * finished execution. - */ -void hrtimer_wait_for_timer(const struct hrtimer *timer) +void hrtimer_grab_expiry_lock(const struct hrtimer *timer) { struct hrtimer_clock_base *base = timer->base; - if (base && base->cpu_base && - base->index >= HRTIMER_BASE_MONOTONIC_SOFT) - wait_event(base->cpu_base->wait, - !(hrtimer_callback_running(timer))); + if (base && base->cpu_base) { + spin_lock(&base->cpu_base->softirq_expiry_lock); + spin_unlock(&base->cpu_base->softirq_expiry_lock); + } } -#else -# define wake_up_timer_waiters(b) do { } while (0) -#endif - /* * enqueue_hrtimer - internal function to (re)start a timer * @@ -1227,7 +1210,7 @@ if (ret >= 0) return ret; - hrtimer_wait_for_timer(timer); + hrtimer_grab_expiry_lock(timer); } } EXPORT_SYMBOL_GPL(hrtimer_cancel); @@ -1531,6 +1514,7 @@ unsigned long flags; ktime_t now; + spin_lock(&cpu_base->softirq_expiry_lock); raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); @@ -1540,7 +1524,7 @@ hrtimer_update_softirq_timer(cpu_base, true); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); - wake_up_timer_waiters(cpu_base); + spin_unlock(&cpu_base->softirq_expiry_lock); } #ifdef CONFIG_HIGH_RES_TIMERS @@ -1950,9 +1934,7 @@ cpu_base->softirq_next_timer = NULL; cpu_base->expires_next = KTIME_MAX; cpu_base->softirq_expires_next = KTIME_MAX; -#ifdef CONFIG_PREEMPT_RT_BASE - init_waitqueue_head(&cpu_base->wait); -#endif + spin_lock_init(&cpu_base->softirq_expiry_lock); return 0; } --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -215,7 +215,7 @@ /* We are sharing ->siglock with it_real_fn() */ if (hrtimer_try_to_cancel(timer) < 0) { spin_unlock_irq(&tsk->sighand->siglock); - hrtimer_wait_for_timer(&tsk->signal->real_timer); + hrtimer_grab_expiry_lock(timer); goto again; } expires = timeval_to_ktime(value->it_value); --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -792,6 +792,7 @@ return t->expires; t->firing = 1; + t->firing_cpu = smp_processor_id(); list_move_tail(&t->entry, firing); } @@ -1138,6 +1139,20 @@ return 0; } +static DEFINE_PER_CPU(spinlock_t, cpu_timer_expiry_lock) = __SPIN_LOCK_UNLOCKED(cpu_timer_expiry_lock); + +void cpu_timers_grab_expiry_lock(struct k_itimer *timer) +{ + int cpu = timer->it.cpu.firing_cpu; + + if (cpu >= 0) { + spinlock_t *expiry_lock = per_cpu_ptr(&cpu_timer_expiry_lock, cpu); + + spin_lock_irq(expiry_lock); + spin_unlock_irq(expiry_lock); + } +} + /* * This is called from the timer interrupt handler. The irq handler has * already updated our counts. We need to check if any timers fire now. @@ -1148,6 +1163,7 @@ LIST_HEAD(firing); struct k_itimer *timer, *next; unsigned long flags; + spinlock_t *expiry_lock; /* * The fast path checks that there are no expired thread or thread @@ -1156,6 +1172,9 @@ if (!fastpath_timer_check(tsk)) return; + expiry_lock = this_cpu_ptr(&cpu_timer_expiry_lock); + spin_lock(expiry_lock); + if (!lock_task_sighand(tsk, &flags)) return; /* @@ -1190,6 +1209,7 @@ list_del_init(&timer->it.cpu.entry); cpu_firing = timer->it.cpu.firing; timer->it.cpu.firing = 0; + timer->it.cpu.firing_cpu = -1; /* * The firing flag is -1 if we collided with a reset * of the timer, which already reported this @@ -1199,6 +1219,7 @@ cpu_timer_fire(timer); spin_unlock(&timer->it_lock); } + spin_unlock(expiry_lock); } #ifdef CONFIG_PREEMPT_RT_BASE @@ -1466,6 +1487,8 @@ spin_unlock_irq(&timer.it_lock); while (error == TIMER_RETRY) { + + cpu_timers_grab_expiry_lock(&timer); /* * We need to handle case when timer was or is in the * middle of firing. In other cases we already freed --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -821,25 +821,20 @@ hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } -/* - * Protected by RCU! - */ -static void timer_wait_for_callback(const struct k_clock *kc, struct k_itimer *timr) +static int common_hrtimer_try_to_cancel(struct k_itimer *timr) { -#ifdef CONFIG_PREEMPT_RT_FULL - if (kc->timer_arm == common_hrtimer_arm) - hrtimer_wait_for_timer(&timr->it.real.timer); - else if (kc == &alarm_clock) - hrtimer_wait_for_timer(&timr->it.alarm.alarmtimer.timer); - else - /* FIXME: Whacky hack for posix-cpu-timers */ - schedule_timeout(1); -#endif + return hrtimer_try_to_cancel(&timr->it.real.timer); } -static int common_hrtimer_try_to_cancel(struct k_itimer *timr) +static void timer_wait_for_callback(const struct k_clock *kc, struct k_itimer *timer) { - return hrtimer_try_to_cancel(&timr->it.real.timer); + if (kc->timer_arm == common_hrtimer_arm) + hrtimer_grab_expiry_lock(&timer->it.real.timer); + else if (kc == &alarm_clock) + hrtimer_grab_expiry_lock(&timer->it.alarm.alarmtimer.timer); + else + /* posix-cpu-timers */ + cpu_timers_grab_expiry_lock(timer); } /* Set a POSIX.1b interval timer. */ @@ -901,21 +896,21 @@ if (!timr) return -EINVAL; - rcu_read_lock(); kc = timr->kclock; if (WARN_ON_ONCE(!kc || !kc->timer_set)) error = -EINVAL; else error = kc->timer_set(timr, flags, new_spec64, old_spec64); - unlock_timer(timr, flag); if (error == TIMER_RETRY) { + rcu_read_lock(); + unlock_timer(timr, flag); timer_wait_for_callback(kc, timr); - old_spec64 = NULL; // We already got the old time... rcu_read_unlock(); + old_spec64 = NULL; // We already got the old time... goto retry; } - rcu_read_unlock(); + unlock_timer(timr, flag); return error; } @@ -977,13 +972,21 @@ return 0; } -static inline int timer_delete_hook(struct k_itimer *timer) +static int timer_delete_hook(struct k_itimer *timer) { const struct k_clock *kc = timer->kclock; + int ret; if (WARN_ON_ONCE(!kc || !kc->timer_del)) return -EINVAL; - return kc->timer_del(timer); + ret = kc->timer_del(timer); + if (ret == TIMER_RETRY) { + rcu_read_lock(); + spin_unlock_irq(&timer->it_lock); + timer_wait_for_callback(kc, timer); + rcu_read_unlock(); + } + return ret; } /* Delete a POSIX.1b interval timer. */ @@ -997,15 +1000,8 @@ if (!timer) return -EINVAL; - rcu_read_lock(); - if (timer_delete_hook(timer) == TIMER_RETRY) { - unlock_timer(timer, flags); - timer_wait_for_callback(clockid_to_kclock(timer->it_clock), - timer); - rcu_read_unlock(); + if (timer_delete_hook(timer) == TIMER_RETRY) goto retry_delete; - } - rcu_read_unlock(); spin_lock(¤t->sighand->siglock); list_del(&timer->list); @@ -1031,20 +1027,9 @@ retry_delete: spin_lock_irqsave(&timer->it_lock, flags); - /* On RT we can race with a deletion */ - if (!timer->it_signal) { - unlock_timer(timer, flags); - return; - } - - if (timer_delete_hook(timer) == TIMER_RETRY) { - rcu_read_lock(); - unlock_timer(timer, flags); - timer_wait_for_callback(clockid_to_kclock(timer->it_clock), - timer); - rcu_read_unlock(); + if (timer_delete_hook(timer) == TIMER_RETRY) goto retry_delete; - } + list_del(&timer->list); /* * This keeps any tasks waiting on the spin lock from thinking --- a/kernel/time/posix-timers.h +++ b/kernel/time/posix-timers.h @@ -32,6 +32,8 @@ extern const struct k_clock clock_thread; extern const struct k_clock alarm_clock; +extern void cpu_timers_grab_expiry_lock(struct k_itimer *timer); + int posix_timer_event(struct k_itimer *timr, int si_private); void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting); --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -45,7 +45,6 @@ #include #include #include -#include #include #include