[rt] Update to 4.18-rc8-rt1 and re-enable

This commit is contained in:
Ben Hutchings 2018-08-27 15:32:32 +01:00
parent e2fa147f31
commit 9d3878242c
261 changed files with 32585 additions and 1 deletions

1
debian/changelog vendored
View File

@ -2,6 +2,7 @@ linux (4.18.5-1) UNRELEASED; urgency=medium
* Set ABI to 1
* [x86,arm64] Disable code signing for upload to unstable
* [rt] Update to 4.18-rc8-rt1 and re-enable
-- Ben Hutchings <ben@decadent.org.uk> Mon, 27 Aug 2018 14:22:41 +0100

View File

@ -121,7 +121,7 @@ debug-info: true
signed-code: false
[featureset-rt_base]
enabled: false
enabled: true
[description]
part-long-up: This kernel is not suitable for SMP (multi-processor,

View File

@ -0,0 +1,236 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:38 +0200
Subject: [PATCH 1/6] ARM: at91: add TCB registers definitions
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Add registers and bits definitions for the timer counter blocks found on
Atmel ARM SoCs.
Tested-by: Alexander Dahl <ada@thorsis.com>
Tested-by: Andras Szemzo <szemzo.andras@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/soc/at91/atmel_tcb.h | 216 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 216 insertions(+)
create mode 100644 include/soc/at91/atmel_tcb.h
--- /dev/null
+++ b/include/soc/at91/atmel_tcb.h
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018 Microchip */
+
+#ifndef __SOC_ATMEL_TCB_H
+#define __SOC_ATMEL_TCB_H
+
+/* Channel registers */
+#define ATMEL_TC_COFFS(c) ((c) * 0x40)
+#define ATMEL_TC_CCR(c) ATMEL_TC_COFFS(c)
+#define ATMEL_TC_CMR(c) (ATMEL_TC_COFFS(c) + 0x4)
+#define ATMEL_TC_SMMR(c) (ATMEL_TC_COFFS(c) + 0x8)
+#define ATMEL_TC_RAB(c) (ATMEL_TC_COFFS(c) + 0xc)
+#define ATMEL_TC_CV(c) (ATMEL_TC_COFFS(c) + 0x10)
+#define ATMEL_TC_RA(c) (ATMEL_TC_COFFS(c) + 0x14)
+#define ATMEL_TC_RB(c) (ATMEL_TC_COFFS(c) + 0x18)
+#define ATMEL_TC_RC(c) (ATMEL_TC_COFFS(c) + 0x1c)
+#define ATMEL_TC_SR(c) (ATMEL_TC_COFFS(c) + 0x20)
+#define ATMEL_TC_IER(c) (ATMEL_TC_COFFS(c) + 0x24)
+#define ATMEL_TC_IDR(c) (ATMEL_TC_COFFS(c) + 0x28)
+#define ATMEL_TC_IMR(c) (ATMEL_TC_COFFS(c) + 0x2c)
+#define ATMEL_TC_EMR(c) (ATMEL_TC_COFFS(c) + 0x30)
+
+/* Block registers */
+#define ATMEL_TC_BCR 0xc0
+#define ATMEL_TC_BMR 0xc4
+#define ATMEL_TC_QIER 0xc8
+#define ATMEL_TC_QIDR 0xcc
+#define ATMEL_TC_QIMR 0xd0
+#define ATMEL_TC_QISR 0xd4
+#define ATMEL_TC_FMR 0xd8
+#define ATMEL_TC_WPMR 0xe4
+
+/* CCR fields */
+#define ATMEL_TC_CCR_CLKEN BIT(0)
+#define ATMEL_TC_CCR_CLKDIS BIT(1)
+#define ATMEL_TC_CCR_SWTRG BIT(2)
+
+/* Common CMR fields */
+#define ATMEL_TC_CMR_TCLKS_MSK GENMASK(2, 0)
+#define ATMEL_TC_CMR_TCLK(x) (x)
+#define ATMEL_TC_CMR_XC(x) ((x) + 5)
+#define ATMEL_TC_CMR_CLKI BIT(3)
+#define ATMEL_TC_CMR_BURST_MSK GENMASK(5, 4)
+#define ATMEL_TC_CMR_BURST_XC(x) (((x) + 1) << 4)
+#define ATMEL_TC_CMR_WAVE BIT(15)
+
+/* Capture mode CMR fields */
+#define ATMEL_TC_CMR_LDBSTOP BIT(6)
+#define ATMEL_TC_CMR_LDBDIS BIT(7)
+#define ATMEL_TC_CMR_ETRGEDG_MSK GENMASK(9, 8)
+#define ATMEL_TC_CMR_ETRGEDG_NONE (0 << 8)
+#define ATMEL_TC_CMR_ETRGEDG_RISING (1 << 8)
+#define ATMEL_TC_CMR_ETRGEDG_FALLING (2 << 8)
+#define ATMEL_TC_CMR_ETRGEDG_BOTH (3 << 8)
+#define ATMEL_TC_CMR_ABETRG BIT(10)
+#define ATMEL_TC_CMR_CPCTRG BIT(14)
+#define ATMEL_TC_CMR_LDRA_MSK GENMASK(17, 16)
+#define ATMEL_TC_CMR_LDRA_NONE (0 << 16)
+#define ATMEL_TC_CMR_LDRA_RISING (1 << 16)
+#define ATMEL_TC_CMR_LDRA_FALLING (2 << 16)
+#define ATMEL_TC_CMR_LDRA_BOTH (3 << 16)
+#define ATMEL_TC_CMR_LDRB_MSK GENMASK(19, 18)
+#define ATMEL_TC_CMR_LDRB_NONE (0 << 18)
+#define ATMEL_TC_CMR_LDRB_RISING (1 << 18)
+#define ATMEL_TC_CMR_LDRB_FALLING (2 << 18)
+#define ATMEL_TC_CMR_LDRB_BOTH (3 << 18)
+#define ATMEL_TC_CMR_SBSMPLR_MSK GENMASK(22, 20)
+#define ATMEL_TC_CMR_SBSMPLR(x) ((x) << 20)
+
+/* Waveform mode CMR fields */
+#define ATMEL_TC_CMR_CPCSTOP BIT(6)
+#define ATMEL_TC_CMR_CPCDIS BIT(7)
+#define ATMEL_TC_CMR_EEVTEDG_MSK GENMASK(9, 8)
+#define ATMEL_TC_CMR_EEVTEDG_NONE (0 << 8)
+#define ATMEL_TC_CMR_EEVTEDG_RISING (1 << 8)
+#define ATMEL_TC_CMR_EEVTEDG_FALLING (2 << 8)
+#define ATMEL_TC_CMR_EEVTEDG_BOTH (3 << 8)
+#define ATMEL_TC_CMR_EEVT_MSK GENMASK(11, 10)
+#define ATMEL_TC_CMR_EEVT_XC(x) (((x) + 1) << 10)
+#define ATMEL_TC_CMR_ENETRG BIT(12)
+#define ATMEL_TC_CMR_WAVESEL_MSK GENMASK(14, 13)
+#define ATMEL_TC_CMR_WAVESEL_UP (0 << 13)
+#define ATMEL_TC_CMR_WAVESEL_UPDOWN (1 << 13)
+#define ATMEL_TC_CMR_WAVESEL_UPRC (2 << 13)
+#define ATMEL_TC_CMR_WAVESEL_UPDOWNRC (3 << 13)
+#define ATMEL_TC_CMR_ACPA_MSK GENMASK(17, 16)
+#define ATMEL_TC_CMR_ACPA(a) (ATMEL_TC_CMR_ACTION_##a << 16)
+#define ATMEL_TC_CMR_ACPC_MSK GENMASK(19, 18)
+#define ATMEL_TC_CMR_ACPC(a) (ATMEL_TC_CMR_ACTION_##a << 18)
+#define ATMEL_TC_CMR_AEEVT_MSK GENMASK(21, 20)
+#define ATMEL_TC_CMR_AEEVT(a) (ATMEL_TC_CMR_ACTION_##a << 20)
+#define ATMEL_TC_CMR_ASWTRG_MSK GENMASK(23, 22)
+#define ATMEL_TC_CMR_ASWTRG(a) (ATMEL_TC_CMR_ACTION_##a << 22)
+#define ATMEL_TC_CMR_BCPB_MSK GENMASK(25, 24)
+#define ATMEL_TC_CMR_BCPB(a) (ATMEL_TC_CMR_ACTION_##a << 24)
+#define ATMEL_TC_CMR_BCPC_MSK GENMASK(27, 26)
+#define ATMEL_TC_CMR_BCPC(a) (ATMEL_TC_CMR_ACTION_##a << 26)
+#define ATMEL_TC_CMR_BEEVT_MSK GENMASK(29, 28)
+#define ATMEL_TC_CMR_BEEVT(a) (ATMEL_TC_CMR_ACTION_##a << 28)
+#define ATMEL_TC_CMR_BSWTRG_MSK GENMASK(31, 30)
+#define ATMEL_TC_CMR_BSWTRG(a) (ATMEL_TC_CMR_ACTION_##a << 30)
+#define ATMEL_TC_CMR_ACTION_NONE 0
+#define ATMEL_TC_CMR_ACTION_SET 1
+#define ATMEL_TC_CMR_ACTION_CLEAR 2
+#define ATMEL_TC_CMR_ACTION_TOGGLE 3
+
+/* SMMR fields */
+#define ATMEL_TC_SMMR_GCEN BIT(0)
+#define ATMEL_TC_SMMR_DOWN BIT(1)
+
+/* SR/IER/IDR/IMR fields */
+#define ATMEL_TC_COVFS BIT(0)
+#define ATMEL_TC_LOVRS BIT(1)
+#define ATMEL_TC_CPAS BIT(2)
+#define ATMEL_TC_CPBS BIT(3)
+#define ATMEL_TC_CPCS BIT(4)
+#define ATMEL_TC_LDRAS BIT(5)
+#define ATMEL_TC_LDRBS BIT(6)
+#define ATMEL_TC_ETRGS BIT(7)
+#define ATMEL_TC_CLKSTA BIT(16)
+#define ATMEL_TC_MTIOA BIT(17)
+#define ATMEL_TC_MTIOB BIT(18)
+
+/* EMR fields */
+#define ATMEL_TC_EMR_TRIGSRCA_MSK GENMASK(1, 0)
+#define ATMEL_TC_EMR_TRIGSRCA_TIOA 0
+#define ATMEL_TC_EMR_TRIGSRCA_PWMX 1
+#define ATMEL_TC_EMR_TRIGSRCB_MSK GENMASK(5, 4)
+#define ATMEL_TC_EMR_TRIGSRCB_TIOB (0 << 4)
+#define ATMEL_TC_EMR_TRIGSRCB_PWM (1 << 4)
+#define ATMEL_TC_EMR_NOCLKDIV BIT(8)
+
+/* BCR fields */
+#define ATMEL_TC_BCR_SYNC BIT(0)
+
+/* BMR fields */
+#define ATMEL_TC_BMR_TCXC_MSK(c) GENMASK(((c) * 2) + 1, (c) * 2)
+#define ATMEL_TC_BMR_TCXC(x, c) ((x) << (2 * (c)))
+#define ATMEL_TC_BMR_QDEN BIT(8)
+#define ATMEL_TC_BMR_POSEN BIT(9)
+#define ATMEL_TC_BMR_SPEEDEN BIT(10)
+#define ATMEL_TC_BMR_QDTRANS BIT(11)
+#define ATMEL_TC_BMR_EDGPHA BIT(12)
+#define ATMEL_TC_BMR_INVA BIT(13)
+#define ATMEL_TC_BMR_INVB BIT(14)
+#define ATMEL_TC_BMR_INVIDX BIT(15)
+#define ATMEL_TC_BMR_SWAP BIT(16)
+#define ATMEL_TC_BMR_IDXPHB BIT(17)
+#define ATMEL_TC_BMR_AUTOC BIT(18)
+#define ATMEL_TC_MAXFILT_MSK GENMASK(25, 20)
+#define ATMEL_TC_MAXFILT(x) (((x) - 1) << 20)
+#define ATMEL_TC_MAXCMP_MSK GENMASK(29, 26)
+#define ATMEL_TC_MAXCMP(x) ((x) << 26)
+
+/* QEDC fields */
+#define ATMEL_TC_QEDC_IDX BIT(0)
+#define ATMEL_TC_QEDC_DIRCHG BIT(1)
+#define ATMEL_TC_QEDC_QERR BIT(2)
+#define ATMEL_TC_QEDC_MPE BIT(3)
+#define ATMEL_TC_QEDC_DIR BIT(8)
+
+/* FMR fields */
+#define ATMEL_TC_FMR_ENCF(x) BIT(x)
+
+/* WPMR fields */
+#define ATMEL_TC_WPMR_WPKEY (0x54494d << 8)
+#define ATMEL_TC_WPMR_WPEN BIT(0)
+
+static inline struct clk *tcb_clk_get(struct device_node *node, int channel)
+{
+ struct clk *clk;
+ char clk_name[] = "t0_clk";
+
+ clk_name[1] += channel;
+ clk = of_clk_get_by_name(node->parent, clk_name);
+ if (!IS_ERR(clk))
+ return clk;
+
+ return of_clk_get_by_name(node->parent, "t0_clk");
+}
+
+static inline int tcb_irq_get(struct device_node *node, int channel)
+{
+ int irq;
+
+ irq = of_irq_get(node->parent, channel);
+ if (irq > 0)
+ return irq;
+
+ return of_irq_get(node->parent, 0);
+}
+
+static const u8 atmel_tc_divisors[5] = { 2, 8, 32, 128, 0, };
+
+struct atmel_tcb_info {
+ int bits;
+};
+
+static const struct atmel_tcb_info atmel_tcb_infos[] = {
+ { .bits = 16 },
+ { .bits = 32 },
+};
+
+static const struct of_device_id atmel_tcb_dt_ids[] = {
+ {
+ .compatible = "atmel,at91rm9200-tcb",
+ .data = &atmel_tcb_infos[0],
+ }, {
+ .compatible = "atmel,at91sam9x5-tcb",
+ .data = &atmel_tcb_infos[1],
+ }, {
+ /* sentinel */
+ }
+};
+
+#endif /* __SOC_ATMEL_TCB_H */

View File

@ -0,0 +1,167 @@
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 28 May 2018 15:24:20 +0200
Subject: [PATCH 1/4] Split IRQ-off and zone->lock while freeing pages from PCP
list #1
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Split the IRQ-off section while accessing the PCP list from zone->lock
while freeing pages.
Introcude isolate_pcp_pages() which separates the pages from the PCP
list onto a temporary list and then free the temporary list via
free_pcppages_bulk().
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
mm/page_alloc.c | 82 +++++++++++++++++++++++++++++++++++---------------------
1 file changed, 52 insertions(+), 30 deletions(-)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1067,7 +1067,7 @@ static inline void prefetch_buddy(struct
}
/*
- * Frees a number of pages from the PCP lists
+ * Frees a number of pages which have been collected from the pcp lists.
* Assumes all pages on list are in same zone, and of same order.
* count is the number of pages to free.
*
@@ -1078,14 +1078,41 @@ static inline void prefetch_buddy(struct
* pinned" detection logic.
*/
static void free_pcppages_bulk(struct zone *zone, int count,
- struct per_cpu_pages *pcp)
+ struct list_head *head)
+{
+ bool isolated_pageblocks;
+ struct page *page, *tmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ isolated_pageblocks = has_isolate_pageblock(zone);
+
+ /*
+ * Use safe version since after __free_one_page(),
+ * page->lru.next will not point to original list.
+ */
+ list_for_each_entry_safe(page, tmp, head, lru) {
+ int mt = get_pcppage_migratetype(page);
+ /* MIGRATE_ISOLATE page should not go to pcplists */
+ VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
+ /* Pageblock could have been isolated meanwhile */
+ if (unlikely(isolated_pageblocks))
+ mt = get_pageblock_migratetype(page);
+
+ __free_one_page(page, page_to_pfn(page), zone, 0, mt);
+ trace_mm_page_pcpu_drain(page, 0, mt);
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+}
+
+static void isolate_pcp_pages(int count, struct per_cpu_pages *pcp,
+ struct list_head *dst)
+
{
int migratetype = 0;
int batch_free = 0;
int prefetch_nr = 0;
- bool isolated_pageblocks;
- struct page *page, *tmp;
- LIST_HEAD(head);
+ struct page *page;
while (count) {
struct list_head *list;
@@ -1117,7 +1144,7 @@ static void free_pcppages_bulk(struct zo
if (bulkfree_pcp_prepare(page))
continue;
- list_add_tail(&page->lru, &head);
+ list_add_tail(&page->lru, dst);
/*
* We are going to put the page back to the global
@@ -1132,26 +1159,6 @@ static void free_pcppages_bulk(struct zo
prefetch_buddy(page);
} while (--count && --batch_free && !list_empty(list));
}
-
- spin_lock(&zone->lock);
- isolated_pageblocks = has_isolate_pageblock(zone);
-
- /*
- * Use safe version since after __free_one_page(),
- * page->lru.next will not point to original list.
- */
- list_for_each_entry_safe(page, tmp, &head, lru) {
- int mt = get_pcppage_migratetype(page);
- /* MIGRATE_ISOLATE page should not go to pcplists */
- VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
- /* Pageblock could have been isolated meanwhile */
- if (unlikely(isolated_pageblocks))
- mt = get_pageblock_migratetype(page);
-
- __free_one_page(page, page_to_pfn(page), zone, 0, mt);
- trace_mm_page_pcpu_drain(page, 0, mt);
- }
- spin_unlock(&zone->lock);
}
static void free_one_page(struct zone *zone,
@@ -2515,13 +2522,18 @@ void drain_zone_pages(struct zone *zone,
{
unsigned long flags;
int to_drain, batch;
+ LIST_HEAD(dst);
local_irq_save(flags);
batch = READ_ONCE(pcp->batch);
to_drain = min(pcp->count, batch);
if (to_drain > 0)
- free_pcppages_bulk(zone, to_drain, pcp);
+ isolate_pcp_pages(to_drain, pcp, &dst);
+
local_irq_restore(flags);
+
+ if (to_drain > 0)
+ free_pcppages_bulk(zone, to_drain, &dst);
}
#endif
@@ -2537,14 +2549,21 @@ static void drain_pages_zone(unsigned in
unsigned long flags;
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
+ LIST_HEAD(dst);
+ int count;
local_irq_save(flags);
pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;
- if (pcp->count)
- free_pcppages_bulk(zone, pcp->count, pcp);
+ count = pcp->count;
+ if (count)
+ isolate_pcp_pages(count, pcp, &dst);
+
local_irq_restore(flags);
+
+ if (count)
+ free_pcppages_bulk(zone, count, &dst);
}
/*
@@ -2766,7 +2785,10 @@ static void free_unref_page_commit(struc
pcp->count++;
if (pcp->count >= pcp->high) {
unsigned long batch = READ_ONCE(pcp->batch);
- free_pcppages_bulk(zone, batch, pcp);
+ LIST_HEAD(dst);
+
+ isolate_pcp_pages(batch, pcp, &dst);
+ free_pcppages_bulk(zone, batch, &dst);
}
}

View File

@ -0,0 +1,103 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 7 May 2018 16:51:09 +0200
Subject: [PATCH] bdi: use refcount_t for reference counting instead atomic_t
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
refcount_t type and corresponding API should be used instead of atomic_t when
the variable is used as a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free situations.
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/backing-dev-defs.h | 3 ++-
include/linux/backing-dev.h | 4 ++--
mm/backing-dev.c | 12 ++++++------
3 files changed, 10 insertions(+), 9 deletions(-)
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -12,6 +12,7 @@
#include <linux/timer.h>
#include <linux/workqueue.h>
#include <linux/kref.h>
+#include <linux/refcount.h>
struct page;
struct device;
@@ -75,7 +76,7 @@ enum wb_reason {
*/
struct bdi_writeback_congested {
unsigned long state; /* WB_[a]sync_congested flags */
- atomic_t refcnt; /* nr of attached wb's and blkg */
+ refcount_t refcnt; /* nr of attached wb's and blkg */
#ifdef CONFIG_CGROUP_WRITEBACK
struct backing_dev_info *__bdi; /* the associated bdi, set to NULL
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -404,13 +404,13 @@ static inline bool inode_cgwb_enabled(st
static inline struct bdi_writeback_congested *
wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
{
- atomic_inc(&bdi->wb_congested->refcnt);
+ refcount_inc(&bdi->wb_congested->refcnt);
return bdi->wb_congested;
}
static inline void wb_congested_put(struct bdi_writeback_congested *congested)
{
- if (atomic_dec_and_test(&congested->refcnt))
+ if (refcount_dec_and_test(&congested->refcnt))
kfree(congested);
}
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -438,10 +438,10 @@ wb_congested_get_create(struct backing_d
if (new_congested) {
/* !found and storage for new one already allocated, insert */
congested = new_congested;
- new_congested = NULL;
rb_link_node(&congested->rb_node, parent, node);
rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree);
- goto found;
+ spin_unlock_irqrestore(&cgwb_lock, flags);
+ return congested;
}
spin_unlock_irqrestore(&cgwb_lock, flags);
@@ -451,13 +451,13 @@ wb_congested_get_create(struct backing_d
if (!new_congested)
return NULL;
- atomic_set(&new_congested->refcnt, 0);
+ refcount_set(&new_congested->refcnt, 1);
new_congested->__bdi = bdi;
new_congested->blkcg_id = blkcg_id;
goto retry;
found:
- atomic_inc(&congested->refcnt);
+ refcount_inc(&congested->refcnt);
spin_unlock_irqrestore(&cgwb_lock, flags);
kfree(new_congested);
return congested;
@@ -474,7 +474,7 @@ void wb_congested_put(struct bdi_writeba
unsigned long flags;
local_irq_save(flags);
- if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
+ if (!refcount_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
local_irq_restore(flags);
return;
}
@@ -804,7 +804,7 @@ static int cgwb_bdi_init(struct backing_
if (!bdi->wb_congested)
return -ENOMEM;
- atomic_set(&bdi->wb_congested->refcnt, 1);
+ refcount_set(&bdi->wb_congested->refcnt, 1);
err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
if (err) {

View File

@ -0,0 +1,31 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 3 Jul 2018 12:56:19 +0200
Subject: [PATCH 1/4] mm/list_lru: use list_lru_walk_one() in
list_lru_walk_node()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
list_lru_walk_node() invokes __list_lru_walk_one() with -1 as the
memcg_idx parameter. The same can be achieved by list_lru_walk_one() and
passing NULL as memcg argument which then gets converted into -1. This
is a preparation step when the spin_lock() function is lifted to the
caller of __list_lru_walk_one().
Invoke list_lru_walk_one() instead __list_lru_walk_one() when possible.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
mm/list_lru.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -272,8 +272,8 @@ unsigned long list_lru_walk_node(struct
long isolated = 0;
int memcg_idx;
- isolated += __list_lru_walk_one(lru, nid, -1, isolate, cb_arg,
- nr_to_walk);
+ isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
+ nr_to_walk);
if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
for_each_memcg_cache_index(memcg_idx) {
isolated += __list_lru_walk_one(lru, nid, memcg_idx,

View File

@ -0,0 +1,46 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 22 Jun 2018 10:48:51 +0200
Subject: [PATCH 1/3] mm: workingset: remove local_irq_disable() from
count_shadow_nodes()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
In commit 0c7c1bed7e13 ("mm: make counting of list_lru_one::nr_items
lockless") the
spin_lock(&nlru->lock);
statement was replaced with
rcu_read_lock();
in __list_lru_count_one(). The comment in count_shadow_nodes() says that
the local_irq_disable() is required because the lock must be acquired
with disabled interrupts and (spin_lock()) does not do so.
Since the lock is replaced with rcu_read_lock() the local_irq_disable()
is no longer needed. The code path is
list_lru_shrink_count()
-> list_lru_count_one()
-> __list_lru_count_one()
-> rcu_read_lock()
-> list_lru_from_memcg_idx()
-> rcu_read_unlock()
Remove the local_irq_disable() statement.
Cc: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
mm/workingset.c | 3 ---
1 file changed, 3 deletions(-)
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -366,10 +366,7 @@ static unsigned long count_shadow_nodes(
unsigned long nodes;
unsigned long cache;
- /* list_lru lock nests inside the IRQ-safe i_pages lock */
- local_irq_disable();
nodes = list_lru_shrink_count(&shadow_nodes, sc);
- local_irq_enable();
/*
* Approximate a reasonable limit for the radix tree nodes

View File

@ -0,0 +1,166 @@
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 28 May 2018 15:24:21 +0200
Subject: [PATCH 2/4] Split IRQ-off and zone->lock while freeing pages from PCP
list #2
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Split the IRQ-off section while accessing the PCP list from zone->lock
while freeing pages.
Introcude isolate_pcp_pages() which separates the pages from the PCP
list onto a temporary list and then free the temporary list via
free_pcppages_bulk().
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
mm/page_alloc.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 50 insertions(+), 10 deletions(-)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1077,8 +1077,8 @@ static inline void prefetch_buddy(struct
* And clear the zone's pages_scanned counter, to hold off the "all pages are
* pinned" detection logic.
*/
-static void free_pcppages_bulk(struct zone *zone, int count,
- struct list_head *head)
+static void free_pcppages_bulk(struct zone *zone, struct list_head *head,
+ bool zone_retry)
{
bool isolated_pageblocks;
struct page *page, *tmp;
@@ -1093,12 +1093,27 @@ static void free_pcppages_bulk(struct zo
*/
list_for_each_entry_safe(page, tmp, head, lru) {
int mt = get_pcppage_migratetype(page);
+
+ if (page_zone(page) != zone) {
+ /*
+ * free_unref_page_list() sorts pages by zone. If we end
+ * up with pages from a different NUMA nodes belonging
+ * to the same ZONE index then we need to redo with the
+ * correct ZONE pointer. Skip the page for now, redo it
+ * on the next iteration.
+ */
+ WARN_ON_ONCE(zone_retry == false);
+ if (zone_retry)
+ continue;
+ }
+
/* MIGRATE_ISOLATE page should not go to pcplists */
VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
/* Pageblock could have been isolated meanwhile */
if (unlikely(isolated_pageblocks))
mt = get_pageblock_migratetype(page);
+ list_del(&page->lru);
__free_one_page(page, page_to_pfn(page), zone, 0, mt);
trace_mm_page_pcpu_drain(page, 0, mt);
}
@@ -2533,7 +2548,7 @@ void drain_zone_pages(struct zone *zone,
local_irq_restore(flags);
if (to_drain > 0)
- free_pcppages_bulk(zone, to_drain, &dst);
+ free_pcppages_bulk(zone, &dst, false);
}
#endif
@@ -2563,7 +2578,7 @@ static void drain_pages_zone(unsigned in
local_irq_restore(flags);
if (count)
- free_pcppages_bulk(zone, count, &dst);
+ free_pcppages_bulk(zone, &dst, false);
}
/*
@@ -2756,7 +2771,8 @@ static bool free_unref_page_prepare(stru
return true;
}
-static void free_unref_page_commit(struct page *page, unsigned long pfn)
+static void free_unref_page_commit(struct page *page, unsigned long pfn,
+ struct list_head *dst)
{
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
@@ -2785,10 +2801,8 @@ static void free_unref_page_commit(struc
pcp->count++;
if (pcp->count >= pcp->high) {
unsigned long batch = READ_ONCE(pcp->batch);
- LIST_HEAD(dst);
- isolate_pcp_pages(batch, pcp, &dst);
- free_pcppages_bulk(zone, batch, &dst);
+ isolate_pcp_pages(batch, pcp, dst);
}
}
@@ -2799,13 +2813,17 @@ void free_unref_page(struct page *page)
{
unsigned long flags;
unsigned long pfn = page_to_pfn(page);
+ struct zone *zone = page_zone(page);
+ LIST_HEAD(dst);
if (!free_unref_page_prepare(page, pfn))
return;
local_irq_save(flags);
- free_unref_page_commit(page, pfn);
+ free_unref_page_commit(page, pfn, &dst);
local_irq_restore(flags);
+ if (!list_empty(&dst))
+ free_pcppages_bulk(zone, &dst, false);
}
/*
@@ -2816,6 +2834,11 @@ void free_unref_page_list(struct list_he
struct page *page, *next;
unsigned long flags, pfn;
int batch_count = 0;
+ struct list_head dsts[__MAX_NR_ZONES];
+ int i;
+
+ for (i = 0; i < __MAX_NR_ZONES; i++)
+ INIT_LIST_HEAD(&dsts[i]);
/* Prepare pages for freeing */
list_for_each_entry_safe(page, next, list, lru) {
@@ -2828,10 +2851,12 @@ void free_unref_page_list(struct list_he
local_irq_save(flags);
list_for_each_entry_safe(page, next, list, lru) {
unsigned long pfn = page_private(page);
+ enum zone_type type;
set_page_private(page, 0);
trace_mm_page_free_batched(page);
- free_unref_page_commit(page, pfn);
+ type = page_zonenum(page);
+ free_unref_page_commit(page, pfn, &dsts[type]);
/*
* Guard against excessive IRQ disabled times when we get
@@ -2844,6 +2869,21 @@ void free_unref_page_list(struct list_he
}
}
local_irq_restore(flags);
+
+ for (i = 0; i < __MAX_NR_ZONES; ) {
+ struct page *page;
+ struct zone *zone;
+
+ if (list_empty(&dsts[i])) {
+ i++;
+ continue;
+ }
+
+ page = list_first_entry(&dsts[i], struct page, lru);
+ zone = page_zone(page);
+
+ free_pcppages_bulk(zone, &dsts[i], true);
+ }
}
/*

View File

@ -0,0 +1,672 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:39 +0200
Subject: [PATCH 2/6] clocksource/drivers: Add a new driver for the Atmel ARM
TC blocks
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Add a driver for the Atmel Timer Counter Blocks. This driver provides a
clocksource and two clockevent devices.
One of the clockevent device is linked to the clocksource counter and so it
will run at the same frequency. This will be used when there is only on TCB
channel available for timers.
The other clockevent device runs on a separate TCB channel when available.
This driver uses regmap and syscon to be able to probe early in the boot
and avoid having to switch on the TCB clocksource later. Using regmap also
means that unused TCB channels may be used by other drivers (PWM for
example). read/writel are still used to access channel specific registers
to avoid the performance impact of regmap (mainly locking).
Tested-by: Alexander Dahl <ada@thorsis.com>
Tested-by: Andras Szemzo <szemzo.andras@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/clocksource/Kconfig | 8
drivers/clocksource/Makefile | 3
drivers/clocksource/timer-atmel-tcb.c | 608 ++++++++++++++++++++++++++++++++++
3 files changed, 618 insertions(+), 1 deletion(-)
create mode 100644 drivers/clocksource/timer-atmel-tcb.c
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -403,6 +403,14 @@ config ATMEL_ST
help
Support for the Atmel ST timer.
+config ATMEL_ARM_TCB_CLKSRC
+ bool "Microchip ARM TC Block" if COMPILE_TEST
+ select REGMAP_MMIO
+ depends on GENERIC_CLOCKEVENTS
+ help
+ This enables build of clocksource and clockevent driver for
+ the integrated Timer Counter Blocks in Microchip ARM SoCs.
+
config CLKSRC_EXYNOS_MCT
bool "Exynos multi core timer driver" if COMPILE_TEST
depends on ARM || ARM64
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -3,7 +3,8 @@ obj-$(CONFIG_TIMER_OF) += timer-of.o
obj-$(CONFIG_TIMER_PROBE) += timer-probe.o
obj-$(CONFIG_ATMEL_PIT) += timer-atmel-pit.o
obj-$(CONFIG_ATMEL_ST) += timer-atmel-st.o
-obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o
+obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o
+obj-$(CONFIG_ATMEL_ARM_TCB_CLKSRC) += timer-atmel-tcb.o
obj-$(CONFIG_X86_PM_TIMER) += acpi_pm.o
obj-$(CONFIG_SCx200HR_TIMER) += scx200_hrt.o
obj-$(CONFIG_CS5535_CLOCK_EVENT_SRC) += cs5535-clockevt.o
--- /dev/null
+++ b/drivers/clocksource/timer-atmel-tcb.c
@@ -0,0 +1,608 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/clk.h>
+#include <linux/clockchips.h>
+#include <linux/clocksource.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/regmap.h>
+#include <linux/sched_clock.h>
+#include <soc/at91/atmel_tcb.h>
+
+static struct atmel_tcb_clksrc {
+ struct clocksource clksrc;
+ struct clock_event_device clkevt;
+ struct regmap *regmap;
+ void __iomem *base;
+ struct clk *clk[2];
+ char name[20];
+ int channels[2];
+ int bits;
+ int irq;
+ struct {
+ u32 cmr;
+ u32 imr;
+ u32 rc;
+ bool clken;
+ } cache[2];
+ u32 bmr_cache;
+ bool registered;
+} tc = {
+ .clksrc = {
+ .rating = 200,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ },
+ .clkevt = {
+ .features = CLOCK_EVT_FEAT_ONESHOT,
+ /* Should be lower than at91rm9200's system timer */
+ .rating = 125,
+ },
+};
+
+static struct tc_clkevt_device {
+ struct clock_event_device clkevt;
+ struct regmap *regmap;
+ void __iomem *base;
+ struct clk *slow_clk;
+ struct clk *clk;
+ char name[20];
+ int channel;
+ int irq;
+ struct {
+ u32 cmr;
+ u32 imr;
+ u32 rc;
+ bool clken;
+ } cache;
+ bool registered;
+} tce = {
+ .clkevt = {
+ .features = CLOCK_EVT_FEAT_PERIODIC |
+ CLOCK_EVT_FEAT_ONESHOT,
+ /*
+ * Should be lower than at91rm9200's system timer
+ * but higher than tc.clkevt.rating
+ */
+ .rating = 140,
+ },
+};
+
+/*
+ * Clockevent device using its own channel
+ */
+static int tc_clkevt2_shutdown(struct clock_event_device *d)
+{
+ writel(0xff, tce.base + ATMEL_TC_IDR(tce.channel));
+ writel(ATMEL_TC_CCR_CLKDIS, tce.base + ATMEL_TC_CCR(tce.channel));
+ if (!clockevent_state_detached(d))
+ clk_disable(tce.clk);
+
+ return 0;
+}
+
+/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
+ * because using one of the divided clocks would usually mean the
+ * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
+ *
+ * A divided clock could be good for high resolution timers, since
+ * 30.5 usec resolution can seem "low".
+ */
+static int tc_clkevt2_set_oneshot(struct clock_event_device *d)
+{
+ if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
+ tc_clkevt2_shutdown(d);
+
+ clk_enable(tce.clk);
+
+ /* slow clock, count up to RC, then irq and stop */
+ writel(ATMEL_TC_CMR_TCLK(4) | ATMEL_TC_CMR_CPCSTOP |
+ ATMEL_TC_CMR_WAVE | ATMEL_TC_CMR_WAVESEL_UPRC,
+ tce.base + ATMEL_TC_CMR(tce.channel));
+ writel(ATMEL_TC_CPCS, tce.base + ATMEL_TC_IER(tce.channel));
+
+ return 0;
+}
+
+static int tc_clkevt2_set_periodic(struct clock_event_device *d)
+{
+ if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
+ tc_clkevt2_shutdown(d);
+
+ /* By not making the gentime core emulate periodic mode on top
+ * of oneshot, we get lower overhead and improved accuracy.
+ */
+ clk_enable(tce.clk);
+
+ /* slow clock, count up to RC, then irq and restart */
+ writel(ATMEL_TC_CMR_TCLK(4) | ATMEL_TC_CMR_WAVE |
+ ATMEL_TC_CMR_WAVESEL_UPRC,
+ tce.base + ATMEL_TC_CMR(tce.channel));
+ writel((32768 + HZ / 2) / HZ, tce.base + ATMEL_TC_RC(tce.channel));
+
+ /* Enable clock and interrupts on RC compare */
+ writel(ATMEL_TC_CPCS, tce.base + ATMEL_TC_IER(tce.channel));
+ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG,
+ tce.base + ATMEL_TC_CCR(tce.channel));
+
+ return 0;
+}
+
+static int tc_clkevt2_next_event(unsigned long delta,
+ struct clock_event_device *d)
+{
+ writel(delta, tce.base + ATMEL_TC_RC(tce.channel));
+ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG,
+ tce.base + ATMEL_TC_CCR(tce.channel));
+
+ return 0;
+}
+
+static irqreturn_t tc_clkevt2_irq(int irq, void *handle)
+{
+ unsigned int sr;
+
+ sr = readl(tce.base + ATMEL_TC_SR(tce.channel));
+ if (sr & ATMEL_TC_CPCS) {
+ tce.clkevt.event_handler(&tce.clkevt);
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_NONE;
+}
+
+static void tc_clkevt2_suspend(struct clock_event_device *d)
+{
+ tce.cache.cmr = readl(tce.base + ATMEL_TC_CMR(tce.channel));
+ tce.cache.imr = readl(tce.base + ATMEL_TC_IMR(tce.channel));
+ tce.cache.rc = readl(tce.base + ATMEL_TC_RC(tce.channel));
+ tce.cache.clken = !!(readl(tce.base + ATMEL_TC_SR(tce.channel)) &
+ ATMEL_TC_CLKSTA);
+}
+
+static void tc_clkevt2_resume(struct clock_event_device *d)
+{
+ /* Restore registers for the channel, RA and RB are not used */
+ writel(tce.cache.cmr, tc.base + ATMEL_TC_CMR(tce.channel));
+ writel(tce.cache.rc, tc.base + ATMEL_TC_RC(tce.channel));
+ writel(0, tc.base + ATMEL_TC_RA(tce.channel));
+ writel(0, tc.base + ATMEL_TC_RB(tce.channel));
+ /* Disable all the interrupts */
+ writel(0xff, tc.base + ATMEL_TC_IDR(tce.channel));
+ /* Reenable interrupts that were enabled before suspending */
+ writel(tce.cache.imr, tc.base + ATMEL_TC_IER(tce.channel));
+
+ /* Start the clock if it was used */
+ if (tce.cache.clken)
+ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG,
+ tc.base + ATMEL_TC_CCR(tce.channel));
+}
+
+static int __init tc_clkevt_register(struct device_node *node,
+ struct regmap *regmap, void __iomem *base,
+ int channel, int irq, int bits)
+{
+ int ret;
+
+ tce.regmap = regmap;
+ tce.base = base;
+ tce.channel = channel;
+ tce.irq = irq;
+
+ tce.slow_clk = of_clk_get_by_name(node->parent, "slow_clk");
+ if (IS_ERR(tce.slow_clk))
+ return PTR_ERR(tce.slow_clk);
+
+ ret = clk_prepare_enable(tce.slow_clk);
+ if (ret)
+ return ret;
+
+ tce.clk = tcb_clk_get(node, tce.channel);
+ if (IS_ERR(tce.clk)) {
+ ret = PTR_ERR(tce.clk);
+ goto err_slow;
+ }
+
+ snprintf(tce.name, sizeof(tce.name), "%s:%d",
+ kbasename(node->parent->full_name), channel);
+ tce.clkevt.cpumask = cpumask_of(0);
+ tce.clkevt.name = tce.name;
+ tce.clkevt.set_next_event = tc_clkevt2_next_event,
+ tce.clkevt.set_state_shutdown = tc_clkevt2_shutdown,
+ tce.clkevt.set_state_periodic = tc_clkevt2_set_periodic,
+ tce.clkevt.set_state_oneshot = tc_clkevt2_set_oneshot,
+ tce.clkevt.suspend = tc_clkevt2_suspend,
+ tce.clkevt.resume = tc_clkevt2_resume,
+
+ /* try to enable clk to avoid future errors in mode change */
+ ret = clk_prepare_enable(tce.clk);
+ if (ret)
+ goto err_slow;
+ clk_disable(tce.clk);
+
+ clockevents_config_and_register(&tce.clkevt, 32768, 1, BIT(bits) - 1);
+
+ ret = request_irq(tce.irq, tc_clkevt2_irq, IRQF_TIMER | IRQF_SHARED,
+ tce.clkevt.name, &tce);
+ if (ret)
+ goto err_clk;
+
+ tce.registered = true;
+
+ return 0;
+
+err_clk:
+ clk_unprepare(tce.clk);
+err_slow:
+ clk_disable_unprepare(tce.slow_clk);
+
+ return ret;
+}
+
+/*
+ * Clocksource and clockevent using the same channel(s)
+ */
+static u64 tc_get_cycles(struct clocksource *cs)
+{
+ u32 lower, upper;
+
+ do {
+ upper = readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[1]));
+ lower = readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[0]));
+ } while (upper != readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[1])));
+
+ return (upper << 16) | lower;
+}
+
+static u64 tc_get_cycles32(struct clocksource *cs)
+{
+ return readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[0]));
+}
+
+static u64 notrace tc_sched_clock_read(void)
+{
+ return tc_get_cycles(&tc.clksrc);
+}
+
+static u64 notrace tc_sched_clock_read32(void)
+{
+ return tc_get_cycles32(&tc.clksrc);
+}
+
+static int tcb_clkevt_next_event(unsigned long delta,
+ struct clock_event_device *d)
+{
+ u32 old, next, cur;
+
+
+ old = readl(tc.base + ATMEL_TC_CV(tc.channels[0]));
+ next = old + delta;
+ writel(next, tc.base + ATMEL_TC_RC(tc.channels[0]));
+ cur = readl(tc.base + ATMEL_TC_CV(tc.channels[0]));
+
+ /* check whether the delta elapsed while setting the register */
+ if ((next < old && cur < old && cur > next) ||
+ (next > old && (cur < old || cur > next))) {
+ /*
+ * Clear the CPCS bit in the status register to avoid
+ * generating a spurious interrupt next time a valid
+ * timer event is configured.
+ */
+ old = readl(tc.base + ATMEL_TC_SR(tc.channels[0]));
+ return -ETIME;
+ }
+
+ writel(ATMEL_TC_CPCS, tc.base + ATMEL_TC_IER(tc.channels[0]));
+
+ return 0;
+}
+
+static irqreturn_t tc_clkevt_irq(int irq, void *handle)
+{
+ unsigned int sr;
+
+ sr = readl(tc.base + ATMEL_TC_SR(tc.channels[0]));
+ if (sr & ATMEL_TC_CPCS) {
+ tc.clkevt.event_handler(&tc.clkevt);
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_NONE;
+}
+
+static int tcb_clkevt_oneshot(struct clock_event_device *dev)
+{
+ if (clockevent_state_oneshot(dev))
+ return 0;
+
+ /*
+ * Because both clockevent devices may share the same IRQ, we don't want
+ * the less likely one to stay requested
+ */
+ return request_irq(tc.irq, tc_clkevt_irq, IRQF_TIMER | IRQF_SHARED,
+ tc.name, &tc);
+}
+
+static int tcb_clkevt_shutdown(struct clock_event_device *dev)
+{
+ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[0]));
+ if (tc.bits == 16)
+ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[1]));
+
+ if (!clockevent_state_detached(dev))
+ free_irq(tc.irq, &tc);
+
+ return 0;
+}
+
+static void __init tcb_setup_dual_chan(struct atmel_tcb_clksrc *tc,
+ int mck_divisor_idx)
+{
+ /* first channel: waveform mode, input mclk/8, clock TIOA on overflow */
+ writel(mck_divisor_idx /* likely divide-by-8 */
+ | ATMEL_TC_CMR_WAVE
+ | ATMEL_TC_CMR_WAVESEL_UP /* free-run */
+ | ATMEL_TC_CMR_ACPA(SET) /* TIOA rises at 0 */
+ | ATMEL_TC_CMR_ACPC(CLEAR), /* (duty cycle 50%) */
+ tc->base + ATMEL_TC_CMR(tc->channels[0]));
+ writel(0x0000, tc->base + ATMEL_TC_RA(tc->channels[0]));
+ writel(0x8000, tc->base + ATMEL_TC_RC(tc->channels[0]));
+ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[0])); /* no irqs */
+ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[0]));
+
+ /* second channel: waveform mode, input TIOA */
+ writel(ATMEL_TC_CMR_XC(tc->channels[1]) /* input: TIOA */
+ | ATMEL_TC_CMR_WAVE
+ | ATMEL_TC_CMR_WAVESEL_UP, /* free-run */
+ tc->base + ATMEL_TC_CMR(tc->channels[1]));
+ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[1])); /* no irqs */
+ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[1]));
+
+ /* chain both channel, we assume the previous channel */
+ regmap_write(tc->regmap, ATMEL_TC_BMR,
+ ATMEL_TC_BMR_TCXC(1 + tc->channels[1], tc->channels[1]));
+ /* then reset all the timers */
+ regmap_write(tc->regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
+}
+
+static void __init tcb_setup_single_chan(struct atmel_tcb_clksrc *tc,
+ int mck_divisor_idx)
+{
+ /* channel 0: waveform mode, input mclk/8 */
+ writel(mck_divisor_idx /* likely divide-by-8 */
+ | ATMEL_TC_CMR_WAVE
+ | ATMEL_TC_CMR_WAVESEL_UP, /* free-run */
+ tc->base + ATMEL_TC_CMR(tc->channels[0]));
+ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[0])); /* no irqs */
+ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[0]));
+
+ /* then reset all the timers */
+ regmap_write(tc->regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
+}
+
+static void tc_clksrc_suspend(struct clocksource *cs)
+{
+ int i;
+
+ for (i = 0; i < 1 + (tc.bits == 16); i++) {
+ tc.cache[i].cmr = readl(tc.base + ATMEL_TC_CMR(tc.channels[i]));
+ tc.cache[i].imr = readl(tc.base + ATMEL_TC_IMR(tc.channels[i]));
+ tc.cache[i].rc = readl(tc.base + ATMEL_TC_RC(tc.channels[i]));
+ tc.cache[i].clken = !!(readl(tc.base +
+ ATMEL_TC_SR(tc.channels[i])) &
+ ATMEL_TC_CLKSTA);
+ }
+
+ if (tc.bits == 16)
+ regmap_read(tc.regmap, ATMEL_TC_BMR, &tc.bmr_cache);
+}
+
+static void tc_clksrc_resume(struct clocksource *cs)
+{
+ int i;
+
+ for (i = 0; i < 1 + (tc.bits == 16); i++) {
+ /* Restore registers for the channel, RA and RB are not used */
+ writel(tc.cache[i].cmr, tc.base + ATMEL_TC_CMR(tc.channels[i]));
+ writel(tc.cache[i].rc, tc.base + ATMEL_TC_RC(tc.channels[i]));
+ writel(0, tc.base + ATMEL_TC_RA(tc.channels[i]));
+ writel(0, tc.base + ATMEL_TC_RB(tc.channels[i]));
+ /* Disable all the interrupts */
+ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[i]));
+ /* Reenable interrupts that were enabled before suspending */
+ writel(tc.cache[i].imr, tc.base + ATMEL_TC_IER(tc.channels[i]));
+
+ /* Start the clock if it was used */
+ if (tc.cache[i].clken)
+ writel(ATMEL_TC_CCR_CLKEN, tc.base +
+ ATMEL_TC_CCR(tc.channels[i]));
+ }
+
+ /* in case of dual channel, chain channels */
+ if (tc.bits == 16)
+ regmap_write(tc.regmap, ATMEL_TC_BMR, tc.bmr_cache);
+ /* Finally, trigger all the channels*/
+ regmap_write(tc.regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
+}
+
+static int __init tcb_clksrc_register(struct device_node *node,
+ struct regmap *regmap, void __iomem *base,
+ int channel, int channel1, int irq,
+ int bits)
+{
+ u32 rate, divided_rate = 0;
+ int best_divisor_idx = -1;
+ int i, err = -1;
+ u64 (*tc_sched_clock)(void);
+
+ tc.regmap = regmap;
+ tc.base = base;
+ tc.channels[0] = channel;
+ tc.channels[1] = channel1;
+ tc.irq = irq;
+ tc.bits = bits;
+
+ tc.clk[0] = tcb_clk_get(node, tc.channels[0]);
+ if (IS_ERR(tc.clk[0]))
+ return PTR_ERR(tc.clk[0]);
+ err = clk_prepare_enable(tc.clk[0]);
+ if (err) {
+ pr_debug("can't enable T0 clk\n");
+ goto err_clk;
+ }
+
+ /* How fast will we be counting? Pick something over 5 MHz. */
+ rate = (u32)clk_get_rate(tc.clk[0]);
+ for (i = 0; i < 5; i++) {
+ unsigned int divisor = atmel_tc_divisors[i];
+ unsigned int tmp;
+
+ if (!divisor)
+ continue;
+
+ tmp = rate / divisor;
+ pr_debug("TC: %u / %-3u [%d] --> %u\n", rate, divisor, i, tmp);
+ if (best_divisor_idx > 0) {
+ if (tmp < 5 * 1000 * 1000)
+ continue;
+ }
+ divided_rate = tmp;
+ best_divisor_idx = i;
+ }
+
+ if (tc.bits == 32) {
+ tc.clksrc.read = tc_get_cycles32;
+ tcb_setup_single_chan(&tc, best_divisor_idx);
+ tc_sched_clock = tc_sched_clock_read32;
+ snprintf(tc.name, sizeof(tc.name), "%s:%d",
+ kbasename(node->parent->full_name), tc.channels[0]);
+ } else {
+ tc.clk[1] = tcb_clk_get(node, tc.channels[1]);
+ if (IS_ERR(tc.clk[1]))
+ goto err_disable_t0;
+
+ err = clk_prepare_enable(tc.clk[1]);
+ if (err) {
+ pr_debug("can't enable T1 clk\n");
+ goto err_clk1;
+ }
+ tc.clksrc.read = tc_get_cycles,
+ tcb_setup_dual_chan(&tc, best_divisor_idx);
+ tc_sched_clock = tc_sched_clock_read;
+ snprintf(tc.name, sizeof(tc.name), "%s:%d,%d",
+ kbasename(node->parent->full_name), tc.channels[0],
+ tc.channels[1]);
+ }
+
+ pr_debug("%s at %d.%03d MHz\n", tc.name,
+ divided_rate / 1000000,
+ ((divided_rate + 500000) % 1000000) / 1000);
+
+ tc.clksrc.name = tc.name;
+ tc.clksrc.suspend = tc_clksrc_suspend;
+ tc.clksrc.resume = tc_clksrc_resume;
+
+ err = clocksource_register_hz(&tc.clksrc, divided_rate);
+ if (err)
+ goto err_disable_t1;
+
+ sched_clock_register(tc_sched_clock, 32, divided_rate);
+
+ tc.registered = true;
+
+ /* Set up and register clockevents */
+ tc.clkevt.name = tc.name;
+ tc.clkevt.cpumask = cpumask_of(0);
+ tc.clkevt.set_next_event = tcb_clkevt_next_event;
+ tc.clkevt.set_state_oneshot = tcb_clkevt_oneshot;
+ tc.clkevt.set_state_shutdown = tcb_clkevt_shutdown;
+ clockevents_config_and_register(&tc.clkevt, divided_rate, 1,
+ BIT(tc.bits) - 1);
+
+ return 0;
+
+err_disable_t1:
+ if (tc.bits == 16)
+ clk_disable_unprepare(tc.clk[1]);
+
+err_clk1:
+ if (tc.bits == 16)
+ clk_put(tc.clk[1]);
+
+err_disable_t0:
+ clk_disable_unprepare(tc.clk[0]);
+
+err_clk:
+ clk_put(tc.clk[0]);
+
+ pr_err("%s: unable to register clocksource/clockevent\n",
+ tc.clksrc.name);
+
+ return err;
+}
+
+static int __init tcb_clksrc_init(struct device_node *node)
+{
+ const struct of_device_id *match;
+ const struct atmel_tcb_info *tcb_info;
+ struct regmap *regmap;
+ void __iomem *tcb_base;
+ u32 channel;
+ int bits, irq, err, chan1 = -1;
+
+ if (tc.registered && tce.registered)
+ return -ENODEV;
+
+ /*
+ * The regmap has to be used to access registers that are shared
+ * between channels on the same TCB but we keep direct IO access for
+ * the counters to avoid the impact on performance
+ */
+ regmap = syscon_node_to_regmap(node->parent);
+ if (IS_ERR(regmap))
+ return PTR_ERR(regmap);
+
+ tcb_base = of_iomap(node->parent, 0);
+ if (!tcb_base) {
+ pr_err("%s +%d %s\n", __FILE__, __LINE__, __func__);
+ return -ENXIO;
+ }
+
+ match = of_match_node(atmel_tcb_dt_ids, node->parent);
+ tcb_info = match->data;
+ bits = tcb_info->bits;
+
+ err = of_property_read_u32_index(node, "reg", 0, &channel);
+ if (err)
+ return err;
+
+ irq = tcb_irq_get(node, channel);
+ if (irq < 0)
+ return irq;
+
+ if (tc.registered)
+ return tc_clkevt_register(node, regmap, tcb_base, channel, irq,
+ bits);
+
+ if (bits == 16) {
+ of_property_read_u32_index(node, "reg", 1, &chan1);
+ if (chan1 == -1) {
+ if (tce.registered) {
+ pr_err("%s: clocksource needs two channels\n",
+ node->parent->full_name);
+ return -EINVAL;
+ } else {
+ return tc_clkevt_register(node, regmap,
+ tcb_base, channel,
+ irq, bits);
+ }
+ }
+ }
+
+ return tcb_clksrc_register(node, regmap, tcb_base, channel, chan1, irq,
+ bits);
+}
+CLOCKSOURCE_OF_DECLARE(atmel_tcb_clksrc, "atmel,tcb-timer",
+ tcb_clksrc_init);

View File

@ -0,0 +1,37 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Fri, 4 May 2018 17:45:32 +0200
Subject: [PATCH 2/3] drivers/md/raid5: Use irqsave variant of
atomic_dec_and_lock()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
The irqsave variant of atomic_dec_and_lock handles irqsave/restore when
taking/releasing the spin lock. With this variant the call of
local_irq_save is no longer required.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/md/raid5.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -409,16 +409,15 @@ void raid5_release_stripe(struct stripe_
md_wakeup_thread(conf->mddev->thread);
return;
slow_path:
- local_irq_save(flags);
/* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */
- if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) {
+ if (atomic_dec_and_lock_irqsave(&sh->count, &conf->device_lock, flags)) {
INIT_LIST_HEAD(&list);
hash = sh->hash_lock_index;
do_release_stripe(conf, sh, &list);
spin_unlock(&conf->device_lock);
release_inactive_stripe_list(conf, &list, hash);
+ local_irq_restore(flags);
}
- local_irq_restore(flags);
}
static inline void remove_hash(struct stripe_head *sh)

View File

@ -0,0 +1,65 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 3 Jul 2018 13:06:07 +0200
Subject: [PATCH 2/4] mm/list_lru: Move locking from __list_lru_walk_one() to
its caller
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Move the locking inside __list_lru_walk_one() to its caller. This is a
preparation step in order to introduce list_lru_walk_one_irq() which
does spin_lock_irq() instead of spin_lock() for the locking.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
mm/list_lru.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -204,7 +204,6 @@ static unsigned long
struct list_head *item, *n;
unsigned long isolated = 0;
- spin_lock(&nlru->lock);
l = list_lru_from_memcg_idx(nlru, memcg_idx);
restart:
list_for_each_safe(item, n, &l->list) {
@@ -250,8 +249,6 @@ static unsigned long
BUG();
}
}
-
- spin_unlock(&nlru->lock);
return isolated;
}
@@ -260,8 +257,14 @@ list_lru_walk_one(struct list_lru *lru,
list_lru_walk_cb isolate, void *cb_arg,
unsigned long *nr_to_walk)
{
- return __list_lru_walk_one(lru, nid, memcg_cache_id(memcg),
- isolate, cb_arg, nr_to_walk);
+ struct list_lru_node *nlru = &lru->node[nid];
+ unsigned long ret;
+
+ spin_lock(&nlru->lock);
+ ret = __list_lru_walk_one(lru, nid, memcg_cache_id(memcg),
+ isolate, cb_arg, nr_to_walk);
+ spin_unlock(&nlru->lock);
+ return ret;
}
EXPORT_SYMBOL_GPL(list_lru_walk_one);
@@ -276,8 +279,13 @@ unsigned long list_lru_walk_node(struct
nr_to_walk);
if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
for_each_memcg_cache_index(memcg_idx) {
+ struct list_lru_node *nlru = &lru->node[nid];
+
+ spin_lock(&nlru->lock);
isolated += __list_lru_walk_one(lru, nid, memcg_idx,
isolate, cb_arg, nr_to_walk);
+ spin_unlock(&nlru->lock);
+
if (*nr_to_walk <= 0)
break;
}

View File

@ -0,0 +1,45 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 22 Jun 2018 11:43:35 +0200
Subject: [PATCH 2/3] mm: workingset: make shadow_lru_isolate() use locking
suffix
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
shadow_lru_isolate() disables interrupts and acquires a lock. It could
use spin_lock_irq() instead. It also uses local_irq_enable() while it
could use spin_unlock_irq()/xa_unlock_irq().
Use proper suffix for lock/unlock in order to enable/disable interrupts
during release/acquire of a lock.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
mm/workingset.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -431,7 +431,7 @@ static enum lru_status shadow_lru_isolat
/* Coming from the list, invert the lock order */
if (!xa_trylock(&mapping->i_pages)) {
- spin_unlock(lru_lock);
+ spin_unlock_irq(lru_lock);
ret = LRU_RETRY;
goto out;
}
@@ -469,13 +469,11 @@ static enum lru_status shadow_lru_isolat
workingset_lookup_update(mapping));
out_invalid:
- xa_unlock(&mapping->i_pages);
+ xa_unlock_irq(&mapping->i_pages);
ret = LRU_REMOVED_RETRY;
out:
- local_irq_enable();
cond_resched();
- local_irq_disable();
- spin_lock(lru_lock);
+ spin_lock_irq(lru_lock);
return ret;
}

View File

@ -0,0 +1,83 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 7 May 2018 17:09:42 +0200
Subject: [PATCH] userns: use refcount_t for reference counting instead
atomic_t
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
refcount_t type and corresponding API should be used instead of atomic_t when
the variable is used as a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free situations.
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/sched/user.h | 5 +++--
kernel/user.c | 8 ++++----
2 files changed, 7 insertions(+), 6 deletions(-)
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -4,6 +4,7 @@
#include <linux/uidgid.h>
#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <linux/ratelimit.h>
struct key;
@@ -12,7 +13,7 @@ struct key;
* Some day this will be a full-fledged user tracking system..
*/
struct user_struct {
- atomic_t __count; /* reference count */
+ refcount_t __count; /* reference count */
atomic_t processes; /* How many processes does this user have? */
atomic_t sigpending; /* How many pending signals does this user have? */
#ifdef CONFIG_FANOTIFY
@@ -59,7 +60,7 @@ extern struct user_struct root_user;
extern struct user_struct * alloc_uid(kuid_t);
static inline struct user_struct *get_uid(struct user_struct *u)
{
- atomic_inc(&u->__count);
+ refcount_inc(&u->__count);
return u;
}
extern void free_uid(struct user_struct *);
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -96,7 +96,7 @@ static DEFINE_SPINLOCK(uidhash_lock);
/* root_user.__count is 1, for init task cred */
struct user_struct root_user = {
- .__count = ATOMIC_INIT(1),
+ .__count = REFCOUNT_INIT(1),
.processes = ATOMIC_INIT(1),
.sigpending = ATOMIC_INIT(0),
.locked_shm = 0,
@@ -123,7 +123,7 @@ static struct user_struct *uid_hash_find
hlist_for_each_entry(user, hashent, uidhash_node) {
if (uid_eq(user->uid, uid)) {
- atomic_inc(&user->__count);
+ refcount_inc(&user->__count);
return user;
}
}
@@ -170,7 +170,7 @@ void free_uid(struct user_struct *up)
return;
local_irq_save(flags);
- if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
+ if (refcount_dec_and_lock(&up->__count, &uidhash_lock))
free_user(up, flags);
else
local_irq_restore(flags);
@@ -191,7 +191,7 @@ struct user_struct *alloc_uid(kuid_t uid
goto out_unlock;
new->uid = uid;
- atomic_set(&new->__count, 1);
+ refcount_set(&new->__count, 1);
ratelimit_state_init(&new->ratelimit, HZ, 100);
ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE);

View File

@ -0,0 +1,30 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:40 +0200
Subject: [PATCH 3/6] clocksource/drivers: atmel-pit: make option silent
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
To conform with the other option, make the ATMEL_PIT option silent so it
can be selected from the platform
Tested-by: Alexander Dahl <ada@thorsis.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/clocksource/Kconfig | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -392,8 +392,11 @@ config ARMV7M_SYSTICK
This options enables support for the ARMv7M system timer unit
config ATMEL_PIT
+ bool "Microchip ARM Periodic Interval Timer (PIT)" if COMPILE_TEST
select TIMER_OF if OF
- def_bool SOC_AT91SAM9 || SOC_SAMA5
+ help
+ This enables build of clocksource and clockevent driver for
+ the integrated PIT in Microchip ARM SoCs.
config ATMEL_ST
bool "Atmel ST timer support" if COMPILE_TEST

View File

@ -0,0 +1,31 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Fri, 4 May 2018 17:45:33 +0200
Subject: [PATCH 3/3] drivers/md/raid5: Do not disable irq on
release_inactive_stripe_list() call
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
There is no need to invoke release_inactive_stripe_list() with interrupts
disabled. All call sites, except raid5_release_stripe(), unlock
->device_lock and enable interrupts before invoking the function.
Make it consistent.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/md/raid5.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -414,9 +414,8 @@ void raid5_release_stripe(struct stripe_
INIT_LIST_HEAD(&list);
hash = sh->hash_lock_index;
do_release_stripe(conf, sh, &list);
- spin_unlock(&conf->device_lock);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
release_inactive_stripe_list(conf, &list, hash);
- local_irq_restore(flags);
}
}

View File

@ -0,0 +1,609 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 28 May 2018 15:24:22 +0200
Subject: [PATCH 3/4] mm/SLxB: change list_lock to raw_spinlock_t
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
The list_lock is used with used with IRQs off on RT. Make it a raw_spinlock_t
otherwise the interrupts won't be disabled on -RT. The locking rules remain
the same on !RT.
This patch changes it for SLAB and SLUB since both share the same header
file for struct kmem_cache_node defintion.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
mm/slab.c | 94 +++++++++++++++++++++++++++++++-------------------------------
mm/slab.h | 2 -
mm/slub.c | 50 ++++++++++++++++----------------
3 files changed, 73 insertions(+), 73 deletions(-)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -233,7 +233,7 @@ static void kmem_cache_node_init(struct
parent->shared = NULL;
parent->alien = NULL;
parent->colour_next = 0;
- spin_lock_init(&parent->list_lock);
+ raw_spin_lock_init(&parent->list_lock);
parent->free_objects = 0;
parent->free_touched = 0;
}
@@ -600,9 +600,9 @@ static noinline void cache_free_pfmemall
page_node = page_to_nid(page);
n = get_node(cachep, page_node);
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
free_block(cachep, &objp, 1, page_node, &list);
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
}
@@ -728,7 +728,7 @@ static void __drain_alien_cache(struct k
struct kmem_cache_node *n = get_node(cachep, node);
if (ac->avail) {
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
/*
* Stuff objects into the remote nodes shared array first.
* That way we could avoid the overhead of putting the objects
@@ -739,7 +739,7 @@ static void __drain_alien_cache(struct k
free_block(cachep, ac->entry, ac->avail, node, list);
ac->avail = 0;
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
}
}
@@ -812,9 +812,9 @@ static int __cache_free_alien(struct kme
slabs_destroy(cachep, &list);
} else {
n = get_node(cachep, page_node);
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
free_block(cachep, &objp, 1, page_node, &list);
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
}
return 1;
@@ -855,10 +855,10 @@ static int init_cache_node(struct kmem_c
*/
n = get_node(cachep, node);
if (n) {
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount +
cachep->num;
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
return 0;
}
@@ -937,7 +937,7 @@ static int setup_kmem_cache_node(struct
goto fail;
n = get_node(cachep, node);
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
if (n->shared && force_change) {
free_block(cachep, n->shared->entry,
n->shared->avail, node, &list);
@@ -955,7 +955,7 @@ static int setup_kmem_cache_node(struct
new_alien = NULL;
}
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
slabs_destroy(cachep, &list);
/*
@@ -994,7 +994,7 @@ static void cpuup_canceled(long cpu)
if (!n)
continue;
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
/* Free limit for this kmem_cache_node */
n->free_limit -= cachep->batchcount;
@@ -1007,7 +1007,7 @@ static void cpuup_canceled(long cpu)
}
if (!cpumask_empty(mask)) {
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
goto free_slab;
}
@@ -1021,7 +1021,7 @@ static void cpuup_canceled(long cpu)
alien = n->alien;
n->alien = NULL;
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
kfree(shared);
if (alien) {
@@ -1205,7 +1205,7 @@ static void __init init_list(struct kmem
/*
* Do not assume that spinlocks can be initialized via memcpy:
*/
- spin_lock_init(&ptr->list_lock);
+ raw_spin_lock_init(&ptr->list_lock);
MAKE_ALL_LISTS(cachep, ptr, nodeid);
cachep->node[nodeid] = ptr;
@@ -1376,11 +1376,11 @@ slab_out_of_memory(struct kmem_cache *ca
for_each_kmem_cache_node(cachep, node, n) {
unsigned long total_slabs, free_slabs, free_objs;
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
total_slabs = n->total_slabs;
free_slabs = n->free_slabs;
free_objs = n->free_objects;
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld\n",
node, total_slabs - free_slabs, total_slabs,
@@ -2173,7 +2173,7 @@ static void check_spinlock_acquired(stru
{
#ifdef CONFIG_SMP
check_irq_off();
- assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
+ assert_raw_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
#endif
}
@@ -2181,7 +2181,7 @@ static void check_spinlock_acquired_node
{
#ifdef CONFIG_SMP
check_irq_off();
- assert_spin_locked(&get_node(cachep, node)->list_lock);
+ assert_raw_spin_locked(&get_node(cachep, node)->list_lock);
#endif
}
@@ -2221,9 +2221,9 @@ static void do_drain(void *arg)
check_irq_off();
ac = cpu_cache_get(cachep);
n = get_node(cachep, node);
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
free_block(cachep, ac->entry, ac->avail, node, &list);
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
ac->avail = 0;
}
@@ -2241,9 +2241,9 @@ static void drain_cpu_caches(struct kmem
drain_alien_cache(cachep, n->alien);
for_each_kmem_cache_node(cachep, node, n) {
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
drain_array_locked(cachep, n->shared, node, true, &list);
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
slabs_destroy(cachep, &list);
}
@@ -2265,10 +2265,10 @@ static int drain_freelist(struct kmem_ca
nr_freed = 0;
while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
p = n->slabs_free.prev;
if (p == &n->slabs_free) {
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
goto out;
}
@@ -2281,7 +2281,7 @@ static int drain_freelist(struct kmem_ca
* to the cache.
*/
n->free_objects -= cache->num;
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
slab_destroy(cache, page);
nr_freed++;
}
@@ -2729,7 +2729,7 @@ static void cache_grow_end(struct kmem_c
INIT_LIST_HEAD(&page->lru);
n = get_node(cachep, page_to_nid(page));
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
n->total_slabs++;
if (!page->active) {
list_add_tail(&page->lru, &(n->slabs_free));
@@ -2739,7 +2739,7 @@ static void cache_grow_end(struct kmem_c
STATS_INC_GROWN(cachep);
n->free_objects += cachep->num - page->active;
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
}
@@ -2907,7 +2907,7 @@ static struct page *get_first_slab(struc
{
struct page *page;
- assert_spin_locked(&n->list_lock);
+ assert_raw_spin_locked(&n->list_lock);
page = list_first_entry_or_null(&n->slabs_partial, struct page, lru);
if (!page) {
n->free_touched = 1;
@@ -2933,10 +2933,10 @@ static noinline void *cache_alloc_pfmema
if (!gfp_pfmemalloc_allowed(flags))
return NULL;
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
page = get_first_slab(n, true);
if (!page) {
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
return NULL;
}
@@ -2945,7 +2945,7 @@ static noinline void *cache_alloc_pfmema
fixup_slab_list(cachep, n, page, &list);
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
return obj;
@@ -3004,7 +3004,7 @@ static void *cache_alloc_refill(struct k
if (!n->free_objects && (!shared || !shared->avail))
goto direct_grow;
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
shared = READ_ONCE(n->shared);
/* See if we can refill from the shared array */
@@ -3028,7 +3028,7 @@ static void *cache_alloc_refill(struct k
must_grow:
n->free_objects -= ac->avail;
alloc_done:
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
direct_grow:
@@ -3253,7 +3253,7 @@ static void *____cache_alloc_node(struct
BUG_ON(!n);
check_irq_off();
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
page = get_first_slab(n, false);
if (!page)
goto must_grow;
@@ -3271,12 +3271,12 @@ static void *____cache_alloc_node(struct
fixup_slab_list(cachep, n, page, &list);
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
return obj;
must_grow:
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
if (page) {
/* This slab isn't counted yet so don't update free_objects */
@@ -3452,7 +3452,7 @@ static void cache_flusharray(struct kmem
check_irq_off();
n = get_node(cachep, node);
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
if (n->shared) {
struct array_cache *shared_array = n->shared;
int max = shared_array->limit - shared_array->avail;
@@ -3481,7 +3481,7 @@ static void cache_flusharray(struct kmem
STATS_SET_FREEABLE(cachep, i);
}
#endif
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
ac->avail -= batchcount;
memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
@@ -3887,9 +3887,9 @@ static int __do_tune_cpucache(struct kme
node = cpu_to_mem(cpu);
n = get_node(cachep, node);
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
free_block(cachep, ac->entry, ac->avail, node, &list);
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
slabs_destroy(cachep, &list);
}
free_percpu(prev);
@@ -4014,9 +4014,9 @@ static void drain_array(struct kmem_cach
return;
}
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
drain_array_locked(cachep, ac, node, false, &list);
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
slabs_destroy(cachep, &list);
}
@@ -4100,7 +4100,7 @@ void get_slabinfo(struct kmem_cache *cac
for_each_kmem_cache_node(cachep, node, n) {
check_irq_on();
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
total_slabs += n->total_slabs;
free_slabs += n->free_slabs;
@@ -4109,7 +4109,7 @@ void get_slabinfo(struct kmem_cache *cac
if (n->shared)
shared_avail += n->shared->avail;
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
}
num_objs = total_slabs * cachep->num;
active_slabs = total_slabs - free_slabs;
@@ -4324,13 +4324,13 @@ static int leaks_show(struct seq_file *m
for_each_kmem_cache_node(cachep, node, n) {
check_irq_on();
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
list_for_each_entry(page, &n->slabs_full, lru)
handle_slab(x, cachep, page);
list_for_each_entry(page, &n->slabs_partial, lru)
handle_slab(x, cachep, page);
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
}
} while (!is_store_user_clean(cachep));
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -453,7 +453,7 @@ static inline void slab_post_alloc_hook(
* The slab lists for all objects.
*/
struct kmem_cache_node {
- spinlock_t list_lock;
+ raw_spinlock_t list_lock;
#ifdef CONFIG_SLAB
struct list_head slabs_partial; /* partial list first, better asm code */
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1169,7 +1169,7 @@ static noinline int free_debug_processin
unsigned long uninitialized_var(flags);
int ret = 0;
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
slab_lock(page);
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
@@ -1204,7 +1204,7 @@ static noinline int free_debug_processin
bulk_cnt, cnt);
slab_unlock(page);
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
if (!ret)
slab_fix(s, "Object at 0x%p not freed", object);
return ret;
@@ -1804,7 +1804,7 @@ static void *get_partial_node(struct kme
if (!n || !n->nr_partial)
return NULL;
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
list_for_each_entry_safe(page, page2, &n->partial, lru) {
void *t;
@@ -1829,7 +1829,7 @@ static void *get_partial_node(struct kme
break;
}
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
return object;
}
@@ -2075,7 +2075,7 @@ static void deactivate_slab(struct kmem_
* that acquire_slab() will see a slab page that
* is frozen
*/
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
}
} else {
m = M_FULL;
@@ -2086,7 +2086,7 @@ static void deactivate_slab(struct kmem_
* slabs from diagnostic functions will not see
* any frozen slabs.
*/
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
}
}
@@ -2121,7 +2121,7 @@ static void deactivate_slab(struct kmem_
goto redo;
if (lock)
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
if (m == M_FREE) {
stat(s, DEACTIVATE_EMPTY);
@@ -2156,10 +2156,10 @@ static void unfreeze_partials(struct kme
n2 = get_node(s, page_to_nid(page));
if (n != n2) {
if (n)
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
n = n2;
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
}
do {
@@ -2188,7 +2188,7 @@ static void unfreeze_partials(struct kme
}
if (n)
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
while (discard_page) {
page = discard_page;
@@ -2357,10 +2357,10 @@ static unsigned long count_partial(struc
unsigned long x = 0;
struct page *page;
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, lru)
x += get_count(page);
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
return x;
}
#endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
@@ -2795,7 +2795,7 @@ static void __slab_free(struct kmem_cach
do {
if (unlikely(n)) {
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
n = NULL;
}
prior = page->freelist;
@@ -2827,7 +2827,7 @@ static void __slab_free(struct kmem_cach
* Otherwise the list_lock will synchronize with
* other processors updating the list of slabs.
*/
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
}
}
@@ -2869,7 +2869,7 @@ static void __slab_free(struct kmem_cach
add_partial(n, page, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
return;
slab_empty:
@@ -2884,7 +2884,7 @@ static void __slab_free(struct kmem_cach
remove_full(s, n, page);
}
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
stat(s, FREE_SLAB);
discard_slab(s, page);
}
@@ -3271,7 +3271,7 @@ static void
init_kmem_cache_node(struct kmem_cache_node *n)
{
n->nr_partial = 0;
- spin_lock_init(&n->list_lock);
+ raw_spin_lock_init(&n->list_lock);
INIT_LIST_HEAD(&n->partial);
#ifdef CONFIG_SLUB_DEBUG
atomic_long_set(&n->nr_slabs, 0);
@@ -3655,7 +3655,7 @@ static void free_partial(struct kmem_cac
struct page *page, *h;
BUG_ON(irqs_disabled());
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
list_for_each_entry_safe(page, h, &n->partial, lru) {
if (!page->inuse) {
remove_partial(n, page);
@@ -3665,7 +3665,7 @@ static void free_partial(struct kmem_cac
"Objects remaining in %s on __kmem_cache_shutdown()");
}
}
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
list_for_each_entry_safe(page, h, &discard, lru)
discard_slab(s, page);
@@ -3938,7 +3938,7 @@ int __kmem_cache_shrink(struct kmem_cach
for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
INIT_LIST_HEAD(promote + i);
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
/*
* Build lists of slabs to discard or promote.
@@ -3969,7 +3969,7 @@ int __kmem_cache_shrink(struct kmem_cach
for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
list_splice(promote + i, &n->partial);
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
/* Release empty slabs */
list_for_each_entry_safe(page, t, &discard, lru)
@@ -4383,7 +4383,7 @@ static int validate_slab_node(struct kme
struct page *page;
unsigned long flags;
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, lru) {
validate_slab_slab(s, page, map);
@@ -4405,7 +4405,7 @@ static int validate_slab_node(struct kme
s->name, count, atomic_long_read(&n->nr_slabs));
out:
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
return count;
}
@@ -4595,12 +4595,12 @@ static int list_locations(struct kmem_ca
if (!atomic_long_read(&n->nr_slabs))
continue;
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, lru)
process_slab(&t, s, page, alloc, map);
list_for_each_entry(page, &n->full, lru)
process_slab(&t, s, page, alloc, map);
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
}
for (i = 0; i < t.count; i++) {

View File

@ -0,0 +1,56 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 3 Jul 2018 13:08:56 +0200
Subject: [PATCH 3/4] mm/list_lru: Pass struct list_lru_node as an argument
__list_lru_walk_one()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
__list_lru_walk_one() is invoked with struct list_lru *lru, int nid as
the first two argument. Those two are only used to retrieve struct
list_lru_node. Since this is already done by the caller of the function
for the locking, we can pass struct list_lru_node directly and avoid the
dance around it.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
mm/list_lru.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -194,12 +194,11 @@ unsigned long list_lru_count_node(struct
EXPORT_SYMBOL_GPL(list_lru_count_node);
static unsigned long
-__list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
+__list_lru_walk_one(struct list_lru_node *nlru, int memcg_idx,
list_lru_walk_cb isolate, void *cb_arg,
unsigned long *nr_to_walk)
{
- struct list_lru_node *nlru = &lru->node[nid];
struct list_lru_one *l;
struct list_head *item, *n;
unsigned long isolated = 0;
@@ -261,8 +260,8 @@ list_lru_walk_one(struct list_lru *lru,
unsigned long ret;
spin_lock(&nlru->lock);
- ret = __list_lru_walk_one(lru, nid, memcg_cache_id(memcg),
- isolate, cb_arg, nr_to_walk);
+ ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
+ nr_to_walk);
spin_unlock(&nlru->lock);
return ret;
}
@@ -282,8 +281,9 @@ unsigned long list_lru_walk_node(struct
struct list_lru_node *nlru = &lru->node[nid];
spin_lock(&nlru->lock);
- isolated += __list_lru_walk_one(lru, nid, memcg_idx,
- isolate, cb_arg, nr_to_walk);
+ isolated += __list_lru_walk_one(nlru, memcg_idx,
+ isolate, cb_arg,
+ nr_to_walk);
spin_unlock(&nlru->lock);
if (*nr_to_walk <= 0)

View File

@ -0,0 +1,49 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:41 +0200
Subject: [PATCH 4/6] ARM: at91: Implement clocksource selection
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Allow selecting and unselecting the PIT clocksource driver so it doesn't
have to be compile when unused.
Tested-by: Alexander Dahl <ada@thorsis.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/mach-at91/Kconfig | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -106,6 +106,31 @@ config SOC_AT91SAM9
AT91SAM9X35
AT91SAM9XE
+comment "Clocksource driver selection"
+
+config ATMEL_CLOCKSOURCE_PIT
+ bool "Periodic Interval Timer (PIT) support"
+ depends on SOC_AT91SAM9 || SOC_SAMA5
+ default SOC_AT91SAM9 || SOC_SAMA5
+ select ATMEL_PIT
+ help
+ Select this to get a clocksource based on the Atmel Periodic Interval
+ Timer. It has a relatively low resolution and the TC Block clocksource
+ should be preferred.
+
+config ATMEL_CLOCKSOURCE_TCB
+ bool "Timer Counter Blocks (TCB) support"
+ depends on SOC_AT91RM9200 || SOC_AT91SAM9 || SOC_SAMA5 || COMPILE_TEST
+ default SOC_AT91RM9200 || SOC_AT91SAM9 || SOC_SAMA5
+ depends on !ATMEL_TCLIB
+ select ATMEL_ARM_TCB_CLKSRC
+ help
+ Select this to get a high precision clocksource based on a
+ TC block with a 5+ MHz base clock rate.
+ On platforms with 16-bit counters, two timer channels are combined
+ to make a single 32-bit timer.
+ It can also be used as a clock event device supporting oneshot mode.
+
config HAVE_AT91_UTMI
bool

View File

@ -0,0 +1,217 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 21 Jun 2018 17:29:19 +0200
Subject: [PATCH 4/4] mm/SLUB: delay giving back empty slubs to IRQ enabled
regions
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
__free_slab() is invoked with disabled interrupts which increases the
irq-off time while __free_pages() is doing the work.
Allow __free_slab() to be invoked with enabled interrupts and move
everything from interrupts-off invocations to a temporary per-CPU list
so it can be processed later.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
mm/slub.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 69 insertions(+), 5 deletions(-)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1332,6 +1332,12 @@ static inline void dec_slabs_node(struct
#endif /* CONFIG_SLUB_DEBUG */
+struct slub_free_list {
+ raw_spinlock_t lock;
+ struct list_head list;
+};
+static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
+
/*
* Hooks for other subsystems that check memory allocations. In a typical
* production configuration these hooks all should produce no code at all.
@@ -1686,6 +1692,16 @@ static void __free_slab(struct kmem_cach
__free_pages(page, order);
}
+static void free_delayed(struct list_head *h)
+{
+ while (!list_empty(h)) {
+ struct page *page = list_first_entry(h, struct page, lru);
+
+ list_del(&page->lru);
+ __free_slab(page->slab_cache, page);
+ }
+}
+
static void rcu_free_slab(struct rcu_head *h)
{
struct page *page = container_of(h, struct page, rcu_head);
@@ -1697,6 +1713,12 @@ static void free_slab(struct kmem_cache
{
if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
call_rcu(&page->rcu_head, rcu_free_slab);
+ } else if (irqs_disabled()) {
+ struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
+
+ raw_spin_lock(&f->lock);
+ list_add(&page->lru, &f->list);
+ raw_spin_unlock(&f->lock);
} else
__free_slab(s, page);
}
@@ -2225,14 +2247,21 @@ static void put_cpu_partial(struct kmem_
pobjects = oldpage->pobjects;
pages = oldpage->pages;
if (drain && pobjects > s->cpu_partial) {
+ struct slub_free_list *f;
unsigned long flags;
+ LIST_HEAD(tofree);
/*
* partial array is full. Move the existing
* set to the per node partial list.
*/
local_irq_save(flags);
unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
+ f = this_cpu_ptr(&slub_free_list);
+ raw_spin_lock(&f->lock);
+ list_splice_init(&f->list, &tofree);
+ raw_spin_unlock(&f->lock);
local_irq_restore(flags);
+ free_delayed(&tofree);
oldpage = NULL;
pobjects = 0;
pages = 0;
@@ -2302,7 +2331,22 @@ static bool has_cpu_slab(int cpu, void *
static void flush_all(struct kmem_cache *s)
{
+ LIST_HEAD(tofree);
+ int cpu;
+
on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
+ for_each_online_cpu(cpu) {
+ struct slub_free_list *f;
+
+ if (!has_cpu_slab(cpu, s))
+ continue;
+
+ f = &per_cpu(slub_free_list, cpu);
+ raw_spin_lock_irq(&f->lock);
+ list_splice_init(&f->list, &tofree);
+ raw_spin_unlock_irq(&f->lock);
+ free_delayed(&tofree);
+ }
}
/*
@@ -2500,8 +2544,10 @@ static inline void *get_freelist(struct
* already disabled (which is the case for bulk allocation).
*/
static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
- unsigned long addr, struct kmem_cache_cpu *c)
+ unsigned long addr, struct kmem_cache_cpu *c,
+ struct list_head *to_free)
{
+ struct slub_free_list *f;
void *freelist;
struct page *page;
@@ -2557,6 +2603,13 @@ static void *___slab_alloc(struct kmem_c
VM_BUG_ON(!c->page->frozen);
c->freelist = get_freepointer(s, freelist);
c->tid = next_tid(c->tid);
+
+out:
+ f = this_cpu_ptr(&slub_free_list);
+ raw_spin_lock(&f->lock);
+ list_splice_init(&f->list, to_free);
+ raw_spin_unlock(&f->lock);
+
return freelist;
new_slab:
@@ -2572,7 +2625,7 @@ static void *___slab_alloc(struct kmem_c
if (unlikely(!freelist)) {
slab_out_of_memory(s, gfpflags, node);
- return NULL;
+ goto out;
}
page = c->page;
@@ -2585,7 +2638,7 @@ static void *___slab_alloc(struct kmem_c
goto new_slab; /* Slab failed checks. Next slab needed */
deactivate_slab(s, page, get_freepointer(s, freelist), c);
- return freelist;
+ goto out;
}
/*
@@ -2597,6 +2650,7 @@ static void *__slab_alloc(struct kmem_ca
{
void *p;
unsigned long flags;
+ LIST_HEAD(tofree);
local_irq_save(flags);
#ifdef CONFIG_PREEMPT
@@ -2608,8 +2662,9 @@ static void *__slab_alloc(struct kmem_ca
c = this_cpu_ptr(s->cpu_slab);
#endif
- p = ___slab_alloc(s, gfpflags, node, addr, c);
+ p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
local_irq_restore(flags);
+ free_delayed(&tofree);
return p;
}
@@ -3087,6 +3142,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
void **p)
{
struct kmem_cache_cpu *c;
+ LIST_HEAD(to_free);
int i;
/* memcg and kmem_cache debug support */
@@ -3110,7 +3166,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
* of re-populating per CPU c->freelist
*/
p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
- _RET_IP_, c);
+ _RET_IP_, c, &to_free);
if (unlikely(!p[i]))
goto error;
@@ -3122,6 +3178,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
}
c->tid = next_tid(c->tid);
local_irq_enable();
+ free_delayed(&to_free);
/* Clear memory outside IRQ disabled fastpath loop */
if (unlikely(flags & __GFP_ZERO)) {
@@ -3136,6 +3193,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
return i;
error:
local_irq_enable();
+ free_delayed(&to_free);
slab_post_alloc_hook(s, flags, i, p);
__kmem_cache_free_bulk(s, i, p);
return 0;
@@ -4182,6 +4240,12 @@ void __init kmem_cache_init(void)
{
static __initdata struct kmem_cache boot_kmem_cache,
boot_kmem_cache_node;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
+ INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
+ }
if (debug_guardpage_minorder())
slub_max_order = 0;

View File

@ -0,0 +1,107 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 3 Jul 2018 13:17:27 +0200
Subject: [PATCH 4/4] mm/list_lru: Introduce list_lru_shrink_walk_irq()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Provide list_lru_shrink_walk_irq() and let it behave like
list_lru_walk_one() except that it locks the spinlock with
spin_lock_irq(). This is used by scan_shadow_nodes() because its lock
nests within the i_pages lock which is acquired with IRQ.
This change allows to use proper locking promitives instead hand crafted
lock_irq_disable() plus spin_lock().
There is no EXPORT_SYMBOL provided because the current user is in-KERNEL
only.
Add list_lru_shrink_walk_irq() which acquires the spinlock with the
proper locking primitives.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/list_lru.h | 25 +++++++++++++++++++++++++
mm/list_lru.c | 15 +++++++++++++++
mm/workingset.c | 8 ++------
3 files changed, 42 insertions(+), 6 deletions(-)
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -162,6 +162,23 @@ unsigned long list_lru_walk_one(struct l
int nid, struct mem_cgroup *memcg,
list_lru_walk_cb isolate, void *cb_arg,
unsigned long *nr_to_walk);
+/**
+ * list_lru_walk_one_irq: walk a list_lru, isolating and disposing freeable items.
+ * @lru: the lru pointer.
+ * @nid: the node id to scan from.
+ * @memcg: the cgroup to scan from.
+ * @isolate: callback function that is resposible for deciding what to do with
+ * the item currently being scanned
+ * @cb_arg: opaque type that will be passed to @isolate
+ * @nr_to_walk: how many items to scan.
+ *
+ * Same as @list_lru_walk_one except that the spinlock is acquired with
+ * spin_lock_irq().
+ */
+unsigned long list_lru_walk_one_irq(struct list_lru *lru,
+ int nid, struct mem_cgroup *memcg,
+ list_lru_walk_cb isolate, void *cb_arg,
+ unsigned long *nr_to_walk);
unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
list_lru_walk_cb isolate, void *cb_arg,
unsigned long *nr_to_walk);
@@ -175,6 +192,14 @@ list_lru_shrink_walk(struct list_lru *lr
}
static inline unsigned long
+list_lru_shrink_walk_irq(struct list_lru *lru, struct shrink_control *sc,
+ list_lru_walk_cb isolate, void *cb_arg)
+{
+ return list_lru_walk_one_irq(lru, sc->nid, sc->memcg, isolate, cb_arg,
+ &sc->nr_to_scan);
+}
+
+static inline unsigned long
list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate,
void *cb_arg, unsigned long nr_to_walk)
{
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -267,6 +267,21 @@ list_lru_walk_one(struct list_lru *lru,
}
EXPORT_SYMBOL_GPL(list_lru_walk_one);
+unsigned long
+list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
+ list_lru_walk_cb isolate, void *cb_arg,
+ unsigned long *nr_to_walk)
+{
+ struct list_lru_node *nlru = &lru->node[nid];
+ unsigned long ret;
+
+ spin_lock_irq(&nlru->lock);
+ ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
+ nr_to_walk);
+ spin_unlock_irq(&nlru->lock);
+ return ret;
+}
+
unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
list_lru_walk_cb isolate, void *cb_arg,
unsigned long *nr_to_walk)
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -480,13 +480,9 @@ static enum lru_status shadow_lru_isolat
static unsigned long scan_shadow_nodes(struct shrinker *shrinker,
struct shrink_control *sc)
{
- unsigned long ret;
-
/* list_lru lock nests inside the IRQ-safe i_pages lock */
- local_irq_disable();
- ret = list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, NULL);
- local_irq_enable();
- return ret;
+ return list_lru_shrink_walk_irq(&shadow_nodes, sc, shadow_lru_isolate,
+ NULL);
}
static struct shrinker workingset_shadow_shrinker = {

View File

@ -0,0 +1,35 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:42 +0200
Subject: [PATCH 5/6] ARM: configs: at91: use new TCB timer driver
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Unselecting ATMEL_TCLIB switches the TCB timer driver from tcb_clksrc to
timer-atmel-tcb.
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/configs/at91_dt_defconfig | 1 -
arch/arm/configs/sama5_defconfig | 1 -
2 files changed, 2 deletions(-)
--- a/arch/arm/configs/at91_dt_defconfig
+++ b/arch/arm/configs/at91_dt_defconfig
@@ -64,7 +64,6 @@ CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=4
CONFIG_BLK_DEV_RAM_SIZE=8192
-CONFIG_ATMEL_TCLIB=y
CONFIG_ATMEL_SSC=y
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
--- a/arch/arm/configs/sama5_defconfig
+++ b/arch/arm/configs/sama5_defconfig
@@ -75,7 +75,6 @@ CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=4
CONFIG_BLK_DEV_RAM_SIZE=8192
-CONFIG_ATMEL_TCLIB=y
CONFIG_ATMEL_SSC=y
CONFIG_EEPROM_AT24=y
CONFIG_SCSI=y

View File

@ -0,0 +1,31 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 4 Apr 2018 11:43:56 +0200
Subject: [PATCH] bdi: Use irqsave variant of refcount_dec_and_lock()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
The irqsave variant of refcount_dec_and_lock handles irqsave/restore when
taking/releasing the spin lock. With this variant the call of
local_irq_save/restore is no longer required.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
[bigeasy: s@atomic_dec_and_lock@refcount_dec_and_lock@g ]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
mm/backing-dev.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -473,11 +473,8 @@ void wb_congested_put(struct bdi_writeba
{
unsigned long flags;
- local_irq_save(flags);
- if (!refcount_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
- local_irq_restore(flags);
+ if (!refcount_dec_and_lock_irqsave(&congested->refcnt, &cgwb_lock, &flags))
return;
- }
/* bdi might already have been destroyed leaving @congested unlinked */
if (congested->__bdi) {

View File

@ -0,0 +1,36 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:43 +0200
Subject: [PATCH 6/6] ARM: configs: at91: unselect PIT
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
The PIT is not required anymore to successfully boot and may actually harm
in case preempt-rt is used because the PIT interrupt is shared.
Disable it so the TCB clocksource is used.
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/configs/at91_dt_defconfig | 1 +
arch/arm/configs/sama5_defconfig | 1 +
2 files changed, 2 insertions(+)
--- a/arch/arm/configs/at91_dt_defconfig
+++ b/arch/arm/configs/at91_dt_defconfig
@@ -19,6 +19,7 @@ CONFIG_ARCH_MULTI_V5=y
CONFIG_ARCH_AT91=y
CONFIG_SOC_AT91RM9200=y
CONFIG_SOC_AT91SAM9=y
+# CONFIG_ATMEL_CLOCKSOURCE_PIT is not set
CONFIG_AEABI=y
CONFIG_UACCESS_WITH_MEMCPY=y
CONFIG_ZBOOT_ROM_TEXT=0x0
--- a/arch/arm/configs/sama5_defconfig
+++ b/arch/arm/configs/sama5_defconfig
@@ -20,6 +20,7 @@ CONFIG_ARCH_AT91=y
CONFIG_SOC_SAMA5D2=y
CONFIG_SOC_SAMA5D3=y
CONFIG_SOC_SAMA5D4=y
+# CONFIG_ATMEL_CLOCKSOURCE_PIT is not set
CONFIG_AEABI=y
CONFIG_UACCESS_WITH_MEMCPY=y
CONFIG_ZBOOT_ROM_TEXT=0x0

View File

@ -0,0 +1,31 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 4 Apr 2018 11:43:57 +0200
Subject: [PATCH] userns: Use irqsave variant of refcount_dec_and_lock()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
The irqsave variant of refcount_dec_and_lock handles irqsave/restore when
taking/releasing the spin lock. With this variant the call of
local_irq_save/restore is no longer required.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
[bigeasy: s@atomic_dec_and_lock@refcount_dec_and_lock@g ]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/user.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -169,11 +169,8 @@ void free_uid(struct user_struct *up)
if (!up)
return;
- local_irq_save(flags);
- if (refcount_dec_and_lock(&up->__count, &uidhash_lock))
+ if (refcount_dec_and_lock_irqsave(&up->__count, &uidhash_lock, &flags))
free_user(up, flags);
- else
- local_irq_restore(flags);
}
struct user_struct *alloc_uid(kuid_t uid)

View File

@ -0,0 +1,86 @@
From: "Yadi.hu" <yadi.hu@windriver.com>
Date: Wed, 10 Dec 2014 10:32:09 +0800
Subject: ARM: enable irq in translation/section permission fault handlers
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Probably happens on all ARM, with
CONFIG_PREEMPT_RT_FULL
CONFIG_DEBUG_ATOMIC_SLEEP
This simple program....
int main() {
*((char*)0xc0001000) = 0;
};
[ 512.742724] BUG: sleeping function called from invalid context at kernel/rtmutex.c:658
[ 512.743000] in_atomic(): 0, irqs_disabled(): 128, pid: 994, name: a
[ 512.743217] INFO: lockdep is turned off.
[ 512.743360] irq event stamp: 0
[ 512.743482] hardirqs last enabled at (0): [< (null)>] (null)
[ 512.743714] hardirqs last disabled at (0): [<c0426370>] copy_process+0x3b0/0x11c0
[ 512.744013] softirqs last enabled at (0): [<c0426370>] copy_process+0x3b0/0x11c0
[ 512.744303] softirqs last disabled at (0): [< (null)>] (null)
[ 512.744631] [<c041872c>] (unwind_backtrace+0x0/0x104)
[ 512.745001] [<c09af0c4>] (dump_stack+0x20/0x24)
[ 512.745355] [<c0462490>] (__might_sleep+0x1dc/0x1e0)
[ 512.745717] [<c09b6770>] (rt_spin_lock+0x34/0x6c)
[ 512.746073] [<c0441bf0>] (do_force_sig_info+0x34/0xf0)
[ 512.746457] [<c0442668>] (force_sig_info+0x18/0x1c)
[ 512.746829] [<c041d880>] (__do_user_fault+0x9c/0xd8)
[ 512.747185] [<c041d938>] (do_bad_area+0x7c/0x94)
[ 512.747536] [<c041d990>] (do_sect_fault+0x40/0x48)
[ 512.747898] [<c040841c>] (do_DataAbort+0x40/0xa0)
[ 512.748181] Exception stack(0xecaa1fb0 to 0xecaa1ff8)
Oxc0000000 belongs to kernel address space, user task can not be
allowed to access it. For above condition, correct result is that
test case should receive a “segment fault” and exits but not stacks.
the root cause is commit 02fe2845d6a8 ("avoid enabling interrupts in
prefetch/data abort handlers"),it deletes irq enable block in Data
abort assemble code and move them into page/breakpiont/alignment fault
handlers instead. But author does not enable irq in translation/section
permission fault handlers. ARM disables irq when it enters exception/
interrupt mode, if kernel doesn't enable irq, it would be still disabled
during translation/section permission fault.
We see the above splat because do_force_sig_info is still called with
IRQs off, and that code eventually does a:
spin_lock_irqsave(&t->sighand->siglock, flags);
As this is architecture independent code, and we've not seen any other
need for other arch to have the siglock converted to raw lock, we can
conclude that we should enable irq for ARM translation/section
permission exception.
Signed-off-by: Yadi.hu <yadi.hu@windriver.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/mm/fault.c | 6 ++++++
1 file changed, 6 insertions(+)
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -438,6 +438,9 @@ do_translation_fault(unsigned long addr,
if (addr < TASK_SIZE)
return do_page_fault(addr, fsr, regs);
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
if (user_mode(regs))
goto bad_area;
@@ -505,6 +508,9 @@ do_translation_fault(unsigned long addr,
static int
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
do_bad_area(addr, fsr, regs);
return 0;
}

View File

@ -0,0 +1,77 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 21 Mar 2013 19:01:05 +0100
Subject: printk: Drop the logbuf_lock more often
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
The lock is hold with irgs off. The latency drops 500us+ on my arm bugs
with a "full" buffer after executing "dmesg" on the shell.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/printk/printk.c | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1411,6 +1411,8 @@ static int syslog_print_all(char __user
{
char *text;
int len = 0;
+ int attempts = 0;
+ int num_msg;
text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
if (!text)
@@ -1422,6 +1424,14 @@ static int syslog_print_all(char __user
u64 seq;
u32 idx;
+try_again:
+ attempts++;
+ if (attempts > 10) {
+ len = -EBUSY;
+ goto out;
+ }
+ num_msg = 0;
+
/*
* Find first record that fits, including all following records,
* into the user-provided buffer for this dump.
@@ -1434,6 +1444,14 @@ static int syslog_print_all(char __user
len += msg_print_text(msg, true, NULL, 0);
idx = log_next(idx);
seq++;
+ num_msg++;
+ if (num_msg > 5) {
+ num_msg = 0;
+ logbuf_unlock_irq();
+ logbuf_lock_irq();
+ if (clear_seq < log_first_seq)
+ goto try_again;
+ }
}
/* move first record forward until length fits into the buffer */
@@ -1445,6 +1463,14 @@ static int syslog_print_all(char __user
len -= msg_print_text(msg, true, NULL, 0);
idx = log_next(idx);
seq++;
+ num_msg++;
+ if (num_msg > 5) {
+ num_msg = 0;
+ logbuf_unlock_irq();
+ logbuf_lock_irq();
+ if (clear_seq < log_first_seq)
+ goto try_again;
+ }
}
/* last message fitting into this dump */
@@ -1483,6 +1509,7 @@ static int syslog_print_all(char __user
clear_seq = log_next_seq;
clear_idx = log_next_idx;
}
+out:
logbuf_unlock_irq();
kfree(text);

View File

@ -0,0 +1,52 @@
From: Josh Cartwright <joshc@ni.com>
Date: Thu, 11 Feb 2016 11:54:01 -0600
Subject: KVM: arm/arm64: downgrade preempt_disable()d region to migrate_disable()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
kvm_arch_vcpu_ioctl_run() disables the use of preemption when updating
the vgic and timer states to prevent the calling task from migrating to
another CPU. It does so to prevent the task from writing to the
incorrect per-CPU GIC distributor registers.
On -rt kernels, it's possible to maintain the same guarantee with the
use of migrate_{disable,enable}(), with the added benefit that the
migrate-disabled region is preemptible. Update
kvm_arch_vcpu_ioctl_run() to do so.
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Reported-by: Manish Jaggi <Manish.Jaggi@caviumnetworks.com>
Signed-off-by: Josh Cartwright <joshc@ni.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
virt/kvm/arm/arm.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -694,7 +694,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
* involves poking the GIC, which must be done in a
* non-preemptible context.
*/
- preempt_disable();
+ migrate_disable();
kvm_pmu_flush_hwstate(vcpu);
@@ -743,7 +743,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
local_irq_enable();
- preempt_enable();
+ migrate_enable();
continue;
}
@@ -821,7 +821,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
/* Exit types that need handling before we can be preempted */
handle_exit_early(vcpu, run, ret);
- preempt_enable();
+ migrate_enable();
ret = handle_exit(vcpu, run, ret);
}

View File

@ -0,0 +1,128 @@
Date: Fri, 28 Oct 2016 23:05:11 +0200
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
To: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: Anna Schumaker <anna.schumaker@netapp.com>,
linux-nfs@vger.kernel.org, linux-kernel@vger.kernel.org,
tglx@linutronix.de
Subject: NFSv4: replace seqcount_t with a seqlock_t
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
The raw_write_seqcount_begin() in nfs4_reclaim_open_state() bugs me
because it maps to preempt_disable() in -RT which I can't have at this
point. So I took a look at the code.
It the lockdep part was removed in commit abbec2da13f0 ("NFS: Use
raw_write_seqcount_begin/end int nfs4_reclaim_open_state") because
lockdep complained. The whole seqcount thing was introduced in commit
c137afabe330 ("NFSv4: Allow the state manager to mark an open_owner as
being recovered").
The recovery threads runs only once.
write_seqlock() does not work on !RT because it disables preemption and it the
writer side is preemptible (has to remain so despite the fact that it will
block readers).
Reported-by: kernel test robot <xiaolong.ye@intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/nfs/delegation.c | 4 ++--
fs/nfs/nfs4_fs.h | 2 +-
fs/nfs/nfs4proc.c | 4 ++--
fs/nfs/nfs4state.c | 22 ++++++++++++++++------
4 files changed, 21 insertions(+), 11 deletions(-)
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -152,11 +152,11 @@ static int nfs_delegation_claim_opens(st
sp = state->owner;
/* Block nfs4_proc_unlck */
mutex_lock(&sp->so_delegreturn_mutex);
- seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
+ seq = read_seqbegin(&sp->so_reclaim_seqlock);
err = nfs4_open_delegation_recall(ctx, state, stateid, type);
if (!err)
err = nfs_delegation_claim_locks(ctx, state, stateid);
- if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
+ if (!err && read_seqretry(&sp->so_reclaim_seqlock, seq))
err = -EAGAIN;
mutex_unlock(&sp->so_delegreturn_mutex);
put_nfs_open_context(ctx);
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -112,7 +112,7 @@ struct nfs4_state_owner {
unsigned long so_flags;
struct list_head so_states;
struct nfs_seqid_counter so_seqid;
- seqcount_t so_reclaim_seqcount;
+ seqlock_t so_reclaim_seqlock;
struct mutex so_delegreturn_mutex;
};
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2811,7 +2811,7 @@ static int _nfs4_open_and_get_state(stru
unsigned int seq;
int ret;
- seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
+ seq = raw_seqcount_begin(&sp->so_reclaim_seqlock.seqcount);
ret = _nfs4_proc_open(opendata, ctx);
if (ret != 0)
@@ -2849,7 +2849,7 @@ static int _nfs4_open_and_get_state(stru
if (d_inode(dentry) == state->inode) {
nfs_inode_attach_open_context(ctx);
- if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
+ if (read_seqretry(&sp->so_reclaim_seqlock, seq))
nfs4_schedule_stateid_recovery(server, state);
else
pnfs_parse_lgopen(state->inode, opendata->lgp, ctx);
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -511,7 +511,7 @@ nfs4_alloc_state_owner(struct nfs_server
nfs4_init_seqid_counter(&sp->so_seqid);
atomic_set(&sp->so_count, 1);
INIT_LIST_HEAD(&sp->so_lru);
- seqcount_init(&sp->so_reclaim_seqcount);
+ seqlock_init(&sp->so_reclaim_seqlock);
mutex_init(&sp->so_delegreturn_mutex);
return sp;
}
@@ -1560,8 +1560,12 @@ static int nfs4_reclaim_open_state(struc
* recovering after a network partition or a reboot from a
* server that doesn't support a grace period.
*/
+#ifdef CONFIG_PREEMPT_RT_FULL
+ write_seqlock(&sp->so_reclaim_seqlock);
+#else
+ write_seqcount_begin(&sp->so_reclaim_seqlock.seqcount);
+#endif
spin_lock(&sp->so_lock);
- raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
restart:
list_for_each_entry(state, &sp->so_states, open_states) {
if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
@@ -1630,14 +1634,20 @@ static int nfs4_reclaim_open_state(struc
spin_lock(&sp->so_lock);
goto restart;
}
- raw_write_seqcount_end(&sp->so_reclaim_seqcount);
spin_unlock(&sp->so_lock);
+#ifdef CONFIG_PREEMPT_RT_FULL
+ write_sequnlock(&sp->so_reclaim_seqlock);
+#else
+ write_seqcount_end(&sp->so_reclaim_seqlock.seqcount);
+#endif
return 0;
out_err:
nfs4_put_open_state(state);
- spin_lock(&sp->so_lock);
- raw_write_seqcount_end(&sp->so_reclaim_seqcount);
- spin_unlock(&sp->so_lock);
+#ifdef CONFIG_PREEMPT_RT_FULL
+ write_sequnlock(&sp->so_reclaim_seqlock);
+#else
+ write_seqcount_end(&sp->so_reclaim_seqlock.seqcount);
+#endif
return status;
}

View File

@ -0,0 +1,49 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 12 Apr 2018 09:16:22 +0200
Subject: [PATCH] [SCSI] libsas: remove irq save in sas_ata_qc_issue()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
[ upstream commit 2da11d4262639dc0e2fabc6a70886db57af25c43 ]
Since commit 312d3e56119a ("[SCSI] libsas: remove ata_port.lock
management duties from lldds") the sas_ata_qc_issue() function unlocks
the ata_port.lock and disables interrupts before doing so.
That lock is always taken with disabled interrupts so at this point, the
interrupts are already disabled. There is no need to disable the
interrupts before the unlock operation because they are already
disabled.
Restoring the interrupt state later does not change anything because
they were disabled and remain disabled. Therefore remove the operations
which do not change the behaviour.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/scsi/libsas/sas_ata.c | 3 ---
1 file changed, 3 deletions(-)
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -176,7 +176,6 @@ static void sas_ata_task_done(struct sas
static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
{
- unsigned long flags;
struct sas_task *task;
struct scatterlist *sg;
int ret = AC_ERR_SYSTEM;
@@ -190,7 +189,6 @@ static unsigned int sas_ata_qc_issue(str
/* TODO: audit callers to ensure they are ready for qc_issue to
* unconditionally re-enable interrupts
*/
- local_irq_save(flags);
spin_unlock(ap->lock);
/* If the device fell off, no sense in issuing commands */
@@ -252,7 +250,6 @@ static unsigned int sas_ata_qc_issue(str
out:
spin_lock(ap->lock);
- local_irq_restore(flags);
return ret;
}

View File

@ -0,0 +1,42 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 12 Apr 2018 09:55:25 +0200
Subject: [PATCH] [SCSI] qla2xxx: remove irq save in qla2x00_poll()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
[ upstream commit b3a8aa90c46095cbad454eb068bfb5a8eb56d4e3 ]
In commit d2ba5675d899 ("[SCSI] qla2xxx: Disable local-interrupts while
polling for RISC status.") added a local_irq_disable() before invoking
the ->intr_handler callback. The function, which was used in this
callback, did not disable interrupts while acquiring the spin_lock so a
deadlock was possible and this change was one possible solution.
The function in question was qla2300_intr_handler() and is using
spin_lock_irqsave() since commit 43fac4d97a1a ("[SCSI] qla2xxx: Resolve
a performance issue in interrupt").
I checked all other ->intr_handler callbacks and all of them use the
irqsave variant so it is safe to remove the local_irq_save() block now.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/scsi/qla2xxx/qla_inline.h | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h
@@ -58,14 +58,12 @@ qla2x00_debounce_register(volatile uint1
static inline void
qla2x00_poll(struct rsp_que *rsp)
{
- unsigned long flags;
struct qla_hw_data *ha = rsp->hw;
- local_irq_save(flags);
+
if (IS_P3P_TYPE(ha))
qla82xx_poll(0, rsp);
else
ha->isp_ops->intr_handler(0, rsp);
- local_irq_restore(flags);
}
static inline uint8_t *

View File

@ -0,0 +1,252 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sat, 27 May 2017 19:02:06 +0200
Subject: kernel/sched/core: add migrate_disable()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
---
include/linux/preempt.h | 23 ++++++++
include/linux/sched.h | 7 ++
include/linux/smp.h | 3 +
kernel/sched/core.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++-
kernel/sched/debug.c | 4 +
5 files changed, 165 insertions(+), 2 deletions(-)
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -185,6 +185,22 @@ do { \
#define preemptible() (preempt_count() == 0 && !irqs_disabled())
+#ifdef CONFIG_SMP
+
+extern void migrate_disable(void);
+extern void migrate_enable(void);
+
+int __migrate_disabled(struct task_struct *p);
+
+#else
+#define migrate_disable() barrier()
+#define migrate_enable() barrier()
+static inline int __migrate_disabled(struct task_struct *p)
+{
+ return 0;
+}
+#endif
+
#ifdef CONFIG_PREEMPT
#define preempt_enable() \
do { \
@@ -253,6 +269,13 @@ do { \
#define preempt_enable_notrace() barrier()
#define preemptible() 0
+#define migrate_disable() barrier()
+#define migrate_enable() barrier()
+
+static inline int __migrate_disabled(struct task_struct *p)
+{
+ return 0;
+}
#endif /* CONFIG_PREEMPT_COUNT */
#ifdef MODULE
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -662,6 +662,13 @@ struct task_struct {
int nr_cpus_allowed;
const cpumask_t *cpus_ptr;
cpumask_t cpus_mask;
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
+ int migrate_disable;
+ int migrate_disable_update;
+# ifdef CONFIG_SCHED_DEBUG
+ int migrate_disable_atomic;
+# endif
+#endif
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -202,6 +202,9 @@ static inline int get_boot_cpu_id(void)
#define get_cpu() ({ preempt_disable(); smp_processor_id(); })
#define put_cpu() preempt_enable()
+#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); })
+#define put_cpu_light() migrate_enable()
+
/*
* Callback to arch code if there's nosmp or maxcpus=0 on the
* boot command line:
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1030,7 +1030,15 @@ void set_cpus_allowed_common(struct task
p->nr_cpus_allowed = cpumask_weight(new_mask);
}
-void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
+int __migrate_disabled(struct task_struct *p)
+{
+ return p->migrate_disable;
+}
+#endif
+
+static void __do_set_cpus_allowed_tail(struct task_struct *p,
+ const struct cpumask *new_mask)
{
struct rq *rq = task_rq(p);
bool queued, running;
@@ -1059,6 +1067,20 @@ void do_set_cpus_allowed(struct task_str
set_curr_task(rq, p);
}
+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+{
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
+ if (__migrate_disabled(p)) {
+ lockdep_assert_held(&p->pi_lock);
+
+ cpumask_copy(&p->cpus_mask, new_mask);
+ p->migrate_disable_update = 1;
+ return;
+ }
+#endif
+ __do_set_cpus_allowed_tail(p, new_mask);
+}
+
/*
* Change a given task's CPU affinity. Migrate the thread to a
* proper CPU and schedule it away if the CPU it's executing on
@@ -1117,9 +1139,16 @@ static int __set_cpus_allowed_ptr(struct
}
/* Can the task run on the task's current CPU? If so, we're done */
- if (cpumask_test_cpu(task_cpu(p), new_mask))
+ if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
goto out;
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
+ if (__migrate_disabled(p)) {
+ p->migrate_disable_update = 1;
+ goto out;
+ }
+#endif
+
dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
if (task_running(rq, p) || p->state == TASK_WAKING) {
struct migration_arg arg = { p, dest_cpu };
@@ -7082,3 +7111,100 @@ const u32 sched_prio_to_wmult[40] = {
};
#undef CREATE_TRACE_POINTS
+
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
+
+void migrate_disable(void)
+{
+ struct task_struct *p = current;
+
+ if (in_atomic() || irqs_disabled()) {
+#ifdef CONFIG_SCHED_DEBUG
+ p->migrate_disable_atomic++;
+#endif
+ return;
+ }
+#ifdef CONFIG_SCHED_DEBUG
+ WARN_ON_ONCE(p->migrate_disable_atomic);
+#endif
+
+ if (p->migrate_disable) {
+ p->migrate_disable++;
+ return;
+ }
+
+ preempt_disable();
+ p->migrate_disable = 1;
+
+ p->cpus_ptr = cpumask_of(smp_processor_id());
+ p->nr_cpus_allowed = 1;
+
+ preempt_enable();
+}
+EXPORT_SYMBOL(migrate_disable);
+
+void migrate_enable(void)
+{
+ struct task_struct *p = current;
+
+ if (in_atomic() || irqs_disabled()) {
+#ifdef CONFIG_SCHED_DEBUG
+ p->migrate_disable_atomic--;
+#endif
+ return;
+ }
+
+#ifdef CONFIG_SCHED_DEBUG
+ WARN_ON_ONCE(p->migrate_disable_atomic);
+#endif
+
+ WARN_ON_ONCE(p->migrate_disable <= 0);
+ if (p->migrate_disable > 1) {
+ p->migrate_disable--;
+ return;
+ }
+
+ preempt_disable();
+
+ p->cpus_ptr = &p->cpus_mask;
+ p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask);
+ p->migrate_disable = 0;
+
+ if (p->migrate_disable_update) {
+ struct rq *rq;
+ struct rq_flags rf;
+
+ rq = task_rq_lock(p, &rf);
+ update_rq_clock(rq);
+
+ __do_set_cpus_allowed_tail(p, &p->cpus_mask);
+ task_rq_unlock(rq, p, &rf);
+
+ p->migrate_disable_update = 0;
+
+ WARN_ON(smp_processor_id() != task_cpu(p));
+ if (!cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
+ const struct cpumask *cpu_valid_mask = cpu_active_mask;
+ struct migration_arg arg;
+ unsigned int dest_cpu;
+
+ if (p->flags & PF_KTHREAD) {
+ /*
+ * Kernel threads are allowed on online && !active CPUs
+ */
+ cpu_valid_mask = cpu_online_mask;
+ }
+ dest_cpu = cpumask_any_and(cpu_valid_mask, &p->cpus_mask);
+ arg.task = p;
+ arg.dest_cpu = dest_cpu;
+
+ preempt_enable();
+ stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
+ tlb_migrate_finish(p->mm);
+ return;
+ }
+ }
+ preempt_enable();
+}
+EXPORT_SYMBOL(migrate_enable);
+#endif
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -979,6 +979,10 @@ void proc_sched_show_task(struct task_st
P(dl.runtime);
P(dl.deadline);
}
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
+ P(migrate_disable);
+#endif
+ P(nr_cpus_allowed);
#undef PN_SCHEDSTAT
#undef PN
#undef __PN

View File

@ -0,0 +1,77 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 11 Oct 2017 17:43:49 +0200
Subject: apparmor: use a locallock instead preempt_disable()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
get_buffers() disables preemption which acts as a lock for the per-CPU
variable. Since we can't disable preemption here on RT, a local_lock is
lock is used in order to remain on the same CPU and not to have more
than one user within the critical section.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
security/apparmor/include/path.h | 19 ++++++++++++++++---
security/apparmor/lsm.c | 2 +-
2 files changed, 17 insertions(+), 4 deletions(-)
--- a/security/apparmor/include/path.h
+++ b/security/apparmor/include/path.h
@@ -40,8 +40,10 @@ struct aa_buffers {
#include <linux/percpu.h>
#include <linux/preempt.h>
+#include <linux/locallock.h>
DECLARE_PER_CPU(struct aa_buffers, aa_buffers);
+DECLARE_LOCAL_IRQ_LOCK(aa_buffers_lock);
#define ASSIGN(FN, A, X, N) ((X) = FN(A, N))
#define EVAL1(FN, A, X) ASSIGN(FN, A, X, 0) /*X = FN(0)*/
@@ -51,7 +53,17 @@ DECLARE_PER_CPU(struct aa_buffers, aa_bu
#define for_each_cpu_buffer(I) for ((I) = 0; (I) < MAX_PATH_BUFFERS; (I)++)
-#ifdef CONFIG_DEBUG_PREEMPT
+#ifdef CONFIG_PREEMPT_RT_BASE
+static inline void AA_BUG_PREEMPT_ENABLED(const char *s)
+{
+ struct local_irq_lock *lv;
+
+ lv = this_cpu_ptr(&aa_buffers_lock);
+ WARN_ONCE(lv->owner != current,
+ "__get_buffer without aa_buffers_lock\n");
+}
+
+#elif defined(CONFIG_DEBUG_PREEMPT)
#define AA_BUG_PREEMPT_ENABLED(X) AA_BUG(preempt_count() <= 0, X)
#else
#define AA_BUG_PREEMPT_ENABLED(X) /* nop */
@@ -67,14 +79,15 @@ DECLARE_PER_CPU(struct aa_buffers, aa_bu
#define get_buffers(X...) \
do { \
- struct aa_buffers *__cpu_var = get_cpu_ptr(&aa_buffers); \
+ struct aa_buffers *__cpu_var; \
+ __cpu_var = get_locked_ptr(aa_buffers_lock, &aa_buffers); \
__get_buffers(__cpu_var, X); \
} while (0)
#define put_buffers(X, Y...) \
do { \
__put_buffers(X, Y); \
- put_cpu_ptr(&aa_buffers); \
+ put_locked_ptr(aa_buffers_lock, &aa_buffers); \
} while (0)
#endif /* __AA_PATH_H */
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -45,7 +45,7 @@
int apparmor_initialized;
DEFINE_PER_CPU(struct aa_buffers, aa_buffers);
-
+DEFINE_LOCAL_IRQ_LOCK(aa_buffers_lock);
/*
* LSM hook functions

View File

@ -0,0 +1,119 @@
From: Anders Roxell <anders.roxell@linaro.org>
Date: Thu, 14 May 2015 17:52:17 +0200
Subject: arch/arm64: Add lazy preempt support
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
arm64 is missing support for PREEMPT_RT. The main feature which is
lacking is support for lazy preemption. The arch-specific entry code,
thread information structure definitions, and associated data tables
have to be extended to provide this support. Then the Kconfig file has
to be extended to indicate the support is available, and also to
indicate that support for full RT preemption is now available.
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
---
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/thread_info.h | 6 +++++-
arch/arm64/kernel/asm-offsets.c | 1 +
arch/arm64/kernel/entry.S | 12 +++++++++---
arch/arm64/kernel/signal.c | 2 +-
5 files changed, 17 insertions(+), 5 deletions(-)
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -125,6 +125,7 @@ config ARM64
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_LAZY
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RCU_TABLE_FREE
select HAVE_STACKPROTECTOR
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -43,6 +43,7 @@ struct thread_info {
u64 ttbr0; /* saved TTBR0_EL1 */
#endif
int preempt_count; /* 0 => preemptable, <0 => bug */
+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
};
#define thread_saved_pc(tsk) \
@@ -76,6 +77,7 @@ void arch_release_task_struct(struct tas
#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */
#define TIF_FSCHECK 5 /* Check FS is USER_DS on return */
+#define TIF_NEED_RESCHED_LAZY 6
#define TIF_NOHZ 7
#define TIF_SYSCALL_TRACE 8
#define TIF_SYSCALL_AUDIT 9
@@ -94,6 +96,7 @@ void arch_release_task_struct(struct tas
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_NOHZ (1 << TIF_NOHZ)
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
@@ -106,8 +109,9 @@ void arch_release_task_struct(struct tas
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
- _TIF_UPROBE | _TIF_FSCHECK)
+ _TIF_UPROBE | _TIF_FSCHECK | _TIF_NEED_RESCHED_LAZY)
+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
_TIF_NOHZ)
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -41,6 +41,7 @@ int main(void)
BLANK();
DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
+ DEFINE(TSK_TI_PREEMPT_LAZY, offsetof(struct task_struct, thread_info.preempt_lazy_count));
DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -633,11 +633,16 @@ ENDPROC(el1_sync)
#ifdef CONFIG_PREEMPT
ldr w24, [tsk, #TSK_TI_PREEMPT] // get preempt count
- cbnz w24, 1f // preempt count != 0
+ cbnz w24, 2f // preempt count != 0
ldr x0, [tsk, #TSK_TI_FLAGS] // get flags
- tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
- bl el1_preempt
+ tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
+
+ ldr w24, [tsk, #TSK_TI_PREEMPT_LAZY] // get preempt lazy count
+ cbnz w24, 2f // preempt lazy count != 0
+ tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling?
1:
+ bl el1_preempt
+2:
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_on
@@ -651,6 +656,7 @@ ENDPROC(el1_irq)
1: bl preempt_schedule_irq // irq en/disable is done inside
ldr x0, [tsk, #TSK_TI_FLAGS] // get new tasks TI_FLAGS
tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
+ tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling?
ret x24
#endif
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -923,7 +923,7 @@ asmlinkage void do_notify_resume(struct
/* Check valid user FS if needed */
addr_limit_user_check();
- if (thread_flags & _TIF_NEED_RESCHED) {
+ if (thread_flags & _TIF_NEED_RESCHED_MASK) {
/* Unmask Debug and SError for the next task */
local_daif_restore(DAIF_PROCCTX_NOIRQ);

View File

@ -0,0 +1,412 @@
From: Frank Rowand <frank.rowand@am.sony.com>
Date: Mon, 19 Sep 2011 14:51:14 -0700
Subject: arm: Convert arm boot_lock to raw
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
The arm boot_lock is used by the secondary processor startup code. The locking
task is the idle thread, which has idle->sched_class == &idle_sched_class.
idle_sched_class->enqueue_task == NULL, so if the idle task blocks on the
lock, the attempt to wake it when the lock becomes available will fail:
try_to_wake_up()
...
activate_task()
enqueue_task()
p->sched_class->enqueue_task(rq, p, flags)
Fix by converting boot_lock to a raw spin lock.
Signed-off-by: Frank Rowand <frank.rowand@am.sony.com>
Link: http://lkml.kernel.org/r/4E77B952.3010606@am.sony.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Tony Lindgren <tony@atomide.com>
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Tested-by: Krzysztof Kozlowski <krzk@kernel.org> [Exynos5422 Linaro PM-QA]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/mach-exynos/platsmp.c | 12 ++++++------
arch/arm/mach-hisi/platmcpm.c | 22 +++++++++++-----------
arch/arm/mach-omap2/omap-smp.c | 10 +++++-----
arch/arm/mach-prima2/platsmp.c | 10 +++++-----
arch/arm/mach-qcom/platsmp.c | 10 +++++-----
arch/arm/mach-spear/platsmp.c | 10 +++++-----
arch/arm/mach-sti/platsmp.c | 10 +++++-----
arch/arm/plat-versatile/platsmp.c | 10 +++++-----
8 files changed, 47 insertions(+), 47 deletions(-)
--- a/arch/arm/mach-exynos/platsmp.c
+++ b/arch/arm/mach-exynos/platsmp.c
@@ -239,7 +239,7 @@ static void write_pen_release(int val)
sync_cache_w(&pen_release);
}
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
static void exynos_secondary_init(unsigned int cpu)
{
@@ -252,8 +252,8 @@ static void exynos_secondary_init(unsign
/*
* Synchronise with the boot thread.
*/
- spin_lock(&boot_lock);
- spin_unlock(&boot_lock);
+ raw_spin_lock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
}
int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr)
@@ -317,7 +317,7 @@ static int exynos_boot_secondary(unsigne
* Set synchronisation state between this boot processor
* and the secondary one
*/
- spin_lock(&boot_lock);
+ raw_spin_lock(&boot_lock);
/*
* The secondary processor is waiting to be released from
@@ -344,7 +344,7 @@ static int exynos_boot_secondary(unsigne
if (timeout == 0) {
printk(KERN_ERR "cpu1 power enable failed");
- spin_unlock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
return -ETIMEDOUT;
}
}
@@ -390,7 +390,7 @@ static int exynos_boot_secondary(unsigne
* calibrations, then wait for it to finish
*/
fail:
- spin_unlock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
return pen_release != -1 ? ret : 0;
}
--- a/arch/arm/mach-hisi/platmcpm.c
+++ b/arch/arm/mach-hisi/platmcpm.c
@@ -61,7 +61,7 @@
static void __iomem *sysctrl, *fabric;
static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER];
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
static u32 fabric_phys_addr;
/*
* [0]: bootwrapper physical address
@@ -113,7 +113,7 @@ static int hip04_boot_secondary(unsigned
if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
return -EINVAL;
- spin_lock_irq(&boot_lock);
+ raw_spin_lock_irq(&boot_lock);
if (hip04_cpu_table[cluster][cpu])
goto out;
@@ -147,7 +147,7 @@ static int hip04_boot_secondary(unsigned
out:
hip04_cpu_table[cluster][cpu]++;
- spin_unlock_irq(&boot_lock);
+ raw_spin_unlock_irq(&boot_lock);
return 0;
}
@@ -162,11 +162,11 @@ static void hip04_cpu_die(unsigned int l
cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
- spin_lock(&boot_lock);
+ raw_spin_lock(&boot_lock);
hip04_cpu_table[cluster][cpu]--;
if (hip04_cpu_table[cluster][cpu] == 1) {
/* A power_up request went ahead of us. */
- spin_unlock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
return;
} else if (hip04_cpu_table[cluster][cpu] > 1) {
pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu);
@@ -174,7 +174,7 @@ static void hip04_cpu_die(unsigned int l
}
last_man = hip04_cluster_is_down(cluster);
- spin_unlock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
if (last_man) {
/* Since it's Cortex A15, disable L2 prefetching. */
asm volatile(
@@ -203,7 +203,7 @@ static int hip04_cpu_kill(unsigned int l
cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
count = TIMEOUT_MSEC / POLL_MSEC;
- spin_lock_irq(&boot_lock);
+ raw_spin_lock_irq(&boot_lock);
for (tries = 0; tries < count; tries++) {
if (hip04_cpu_table[cluster][cpu])
goto err;
@@ -211,10 +211,10 @@ static int hip04_cpu_kill(unsigned int l
data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
if (data & CORE_WFI_STATUS(cpu))
break;
- spin_unlock_irq(&boot_lock);
+ raw_spin_unlock_irq(&boot_lock);
/* Wait for clean L2 when the whole cluster is down. */
msleep(POLL_MSEC);
- spin_lock_irq(&boot_lock);
+ raw_spin_lock_irq(&boot_lock);
}
if (tries >= count)
goto err;
@@ -231,10 +231,10 @@ static int hip04_cpu_kill(unsigned int l
goto err;
if (hip04_cluster_is_down(cluster))
hip04_set_snoop_filter(cluster, 0);
- spin_unlock_irq(&boot_lock);
+ raw_spin_unlock_irq(&boot_lock);
return 1;
err:
- spin_unlock_irq(&boot_lock);
+ raw_spin_unlock_irq(&boot_lock);
return 0;
}
#endif
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -69,7 +69,7 @@ static const struct omap_smp_config omap
.startup_addr = omap5_secondary_startup,
};
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
void __iomem *omap4_get_scu_base(void)
{
@@ -177,8 +177,8 @@ static void omap4_secondary_init(unsigne
/*
* Synchronise with the boot thread.
*/
- spin_lock(&boot_lock);
- spin_unlock(&boot_lock);
+ raw_spin_lock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
}
static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
@@ -191,7 +191,7 @@ static int omap4_boot_secondary(unsigned
* Set synchronisation state between this boot processor
* and the secondary one
*/
- spin_lock(&boot_lock);
+ raw_spin_lock(&boot_lock);
/*
* Update the AuxCoreBoot0 with boot state for secondary core.
@@ -270,7 +270,7 @@ static int omap4_boot_secondary(unsigned
* Now the secondary core is starting up let it run its
* calibrations, then wait for it to finish
*/
- spin_unlock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
return 0;
}
--- a/arch/arm/mach-prima2/platsmp.c
+++ b/arch/arm/mach-prima2/platsmp.c
@@ -22,7 +22,7 @@
static void __iomem *clk_base;
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
static void sirfsoc_secondary_init(unsigned int cpu)
{
@@ -36,8 +36,8 @@ static void sirfsoc_secondary_init(unsig
/*
* Synchronise with the boot thread.
*/
- spin_lock(&boot_lock);
- spin_unlock(&boot_lock);
+ raw_spin_lock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
}
static const struct of_device_id clk_ids[] = {
@@ -75,7 +75,7 @@ static int sirfsoc_boot_secondary(unsign
/* make sure write buffer is drained */
mb();
- spin_lock(&boot_lock);
+ raw_spin_lock(&boot_lock);
/*
* The secondary processor is waiting to be released from
@@ -107,7 +107,7 @@ static int sirfsoc_boot_secondary(unsign
* now the secondary core is starting up let it run its
* calibrations, then wait for it to finish
*/
- spin_unlock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
return pen_release != -1 ? -ENOSYS : 0;
}
--- a/arch/arm/mach-qcom/platsmp.c
+++ b/arch/arm/mach-qcom/platsmp.c
@@ -46,7 +46,7 @@
extern void secondary_startup_arm(void);
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
#ifdef CONFIG_HOTPLUG_CPU
static void qcom_cpu_die(unsigned int cpu)
@@ -60,8 +60,8 @@ static void qcom_secondary_init(unsigned
/*
* Synchronise with the boot thread.
*/
- spin_lock(&boot_lock);
- spin_unlock(&boot_lock);
+ raw_spin_lock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
}
static int scss_release_secondary(unsigned int cpu)
@@ -284,7 +284,7 @@ static int qcom_boot_secondary(unsigned
* set synchronisation state between this boot processor
* and the secondary one
*/
- spin_lock(&boot_lock);
+ raw_spin_lock(&boot_lock);
/*
* Send the secondary CPU a soft interrupt, thereby causing
@@ -297,7 +297,7 @@ static int qcom_boot_secondary(unsigned
* now the secondary core is starting up let it run its
* calibrations, then wait for it to finish
*/
- spin_unlock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
return ret;
}
--- a/arch/arm/mach-spear/platsmp.c
+++ b/arch/arm/mach-spear/platsmp.c
@@ -32,7 +32,7 @@ static void write_pen_release(int val)
sync_cache_w(&pen_release);
}
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
@@ -47,8 +47,8 @@ static void spear13xx_secondary_init(uns
/*
* Synchronise with the boot thread.
*/
- spin_lock(&boot_lock);
- spin_unlock(&boot_lock);
+ raw_spin_lock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
}
static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
@@ -59,7 +59,7 @@ static int spear13xx_boot_secondary(unsi
* set synchronisation state between this boot processor
* and the secondary one
*/
- spin_lock(&boot_lock);
+ raw_spin_lock(&boot_lock);
/*
* The secondary processor is waiting to be released from
@@ -84,7 +84,7 @@ static int spear13xx_boot_secondary(unsi
* now the secondary core is starting up let it run its
* calibrations, then wait for it to finish
*/
- spin_unlock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
return pen_release != -1 ? -ENOSYS : 0;
}
--- a/arch/arm/mach-sti/platsmp.c
+++ b/arch/arm/mach-sti/platsmp.c
@@ -35,7 +35,7 @@ static void write_pen_release(int val)
sync_cache_w(&pen_release);
}
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
static void sti_secondary_init(unsigned int cpu)
{
@@ -48,8 +48,8 @@ static void sti_secondary_init(unsigned
/*
* Synchronise with the boot thread.
*/
- spin_lock(&boot_lock);
- spin_unlock(&boot_lock);
+ raw_spin_lock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
}
static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
@@ -60,7 +60,7 @@ static int sti_boot_secondary(unsigned i
* set synchronisation state between this boot processor
* and the secondary one
*/
- spin_lock(&boot_lock);
+ raw_spin_lock(&boot_lock);
/*
* The secondary processor is waiting to be released from
@@ -91,7 +91,7 @@ static int sti_boot_secondary(unsigned i
* now the secondary core is starting up let it run its
* calibrations, then wait for it to finish
*/
- spin_unlock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
return pen_release != -1 ? -ENOSYS : 0;
}
--- a/arch/arm/plat-versatile/platsmp.c
+++ b/arch/arm/plat-versatile/platsmp.c
@@ -32,7 +32,7 @@ static void write_pen_release(int val)
sync_cache_w(&pen_release);
}
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
void versatile_secondary_init(unsigned int cpu)
{
@@ -45,8 +45,8 @@ void versatile_secondary_init(unsigned i
/*
* Synchronise with the boot thread.
*/
- spin_lock(&boot_lock);
- spin_unlock(&boot_lock);
+ raw_spin_lock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
}
int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
@@ -57,7 +57,7 @@ int versatile_boot_secondary(unsigned in
* Set synchronisation state between this boot processor
* and the secondary one
*/
- spin_lock(&boot_lock);
+ raw_spin_lock(&boot_lock);
/*
* This is really belt and braces; we hold unintended secondary
@@ -87,7 +87,7 @@ int versatile_boot_secondary(unsigned in
* now the secondary core is starting up let it run its
* calibrations, then wait for it to finish
*/
- spin_unlock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
return pen_release != -1 ? -ENOSYS : 0;
}

View File

@ -0,0 +1,165 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 1 Dec 2017 10:42:03 +0100
Subject: [PATCH] arm*: disable NEON in kernel mode
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
NEON in kernel mode is used by the crypto algorithms and raid6 code.
While the raid6 code looks okay, the crypto algorithms do not: NEON
is enabled on first invocation and may allocate/free/map memory before
the NEON mode is disabled again.
This needs to be changed until it can be enabled.
On ARM NEON in kernel mode can be simply disabled. on ARM64 it needs to
stay on due to possible EFI callbacks so here I disable each algorithm.
Cc: stable-rt@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/Kconfig | 2 +-
arch/arm64/crypto/Kconfig | 30 +++++++++++++++---------------
arch/arm64/crypto/crc32-ce-glue.c | 3 ++-
3 files changed, 18 insertions(+), 17 deletions(-)
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2169,7 +2169,7 @@ config NEON
config KERNEL_MODE_NEON
bool "Support for NEON in kernel mode"
- depends on NEON && AEABI
+ depends on NEON && AEABI && !PREEMPT_RT_BASE
help
Say Y to include support for NEON in kernel mode.
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -19,43 +19,43 @@ config CRYPTO_SHA512_ARM64
config CRYPTO_SHA1_ARM64_CE
tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
select CRYPTO_SHA1
config CRYPTO_SHA2_ARM64_CE
tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
select CRYPTO_SHA256_ARM64
config CRYPTO_SHA512_ARM64_CE
tristate "SHA-384/SHA-512 digest algorithm (ARMv8 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
select CRYPTO_SHA512_ARM64
config CRYPTO_SHA3_ARM64
tristate "SHA3 digest algorithm (ARMv8.2 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
select CRYPTO_SHA3
config CRYPTO_SM3_ARM64_CE
tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
select CRYPTO_SM3
config CRYPTO_SM4_ARM64_CE
tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_ALGAPI
select CRYPTO_SM4
config CRYPTO_GHASH_ARM64_CE
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
select CRYPTO_GF128MUL
select CRYPTO_AES
@@ -63,7 +63,7 @@ config CRYPTO_GHASH_ARM64_CE
config CRYPTO_CRCT10DIF_ARM64_CE
tristate "CRCT10DIF digest algorithm using PMULL instructions"
- depends on KERNEL_MODE_NEON && CRC_T10DIF
+ depends on KERNEL_MODE_NEON && CRC_T10DIF && !PREEMPT_RT_BASE
select CRYPTO_HASH
config CRYPTO_CRC32_ARM64_CE
@@ -77,13 +77,13 @@ config CRYPTO_AES_ARM64
config CRYPTO_AES_ARM64_CE
tristate "AES core cipher using ARMv8 Crypto Extensions"
- depends on ARM64 && KERNEL_MODE_NEON
+ depends on ARM64 && KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_ALGAPI
select CRYPTO_AES_ARM64
config CRYPTO_AES_ARM64_CE_CCM
tristate "AES in CCM mode using ARMv8 Crypto Extensions"
- depends on ARM64 && KERNEL_MODE_NEON
+ depends on ARM64 && KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_ALGAPI
select CRYPTO_AES_ARM64_CE
select CRYPTO_AES_ARM64
@@ -91,7 +91,7 @@ config CRYPTO_AES_ARM64_CE_CCM
config CRYPTO_AES_ARM64_CE_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_BLKCIPHER
select CRYPTO_AES_ARM64_CE
select CRYPTO_AES_ARM64
@@ -99,7 +99,7 @@ config CRYPTO_AES_ARM64_CE_BLK
config CRYPTO_AES_ARM64_NEON_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_BLKCIPHER
select CRYPTO_AES_ARM64
select CRYPTO_AES
@@ -107,13 +107,13 @@ config CRYPTO_AES_ARM64_NEON_BLK
config CRYPTO_CHACHA20_NEON
tristate "NEON accelerated ChaCha20 symmetric cipher"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_BLKCIPHER
select CRYPTO_CHACHA20
config CRYPTO_AES_ARM64_BS
tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_BLKCIPHER
select CRYPTO_AES_ARM64_NEON_BLK
select CRYPTO_AES_ARM64
@@ -121,7 +121,7 @@ config CRYPTO_AES_ARM64_BS
config CRYPTO_SPECK_NEON
tristate "NEON accelerated Speck cipher algorithms"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_BLKCIPHER
select CRYPTO_SPECK
--- a/arch/arm64/crypto/crc32-ce-glue.c
+++ b/arch/arm64/crypto/crc32-ce-glue.c
@@ -208,7 +208,8 @@ static struct shash_alg crc32_pmull_algs
static int __init crc32_pmull_mod_init(void)
{
- if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) {
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+ !IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && (elf_hwcap & HWCAP_PMULL)) {
crc32_pmull_algs[0].update = crc32_pmull_update;
crc32_pmull_algs[1].update = crc32c_pmull_update;

View File

@ -0,0 +1,174 @@
Subject: arm: Enable highmem for rt
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Feb 2013 11:03:11 +0100
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
fixup highmem for ARM.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/arm/include/asm/switch_to.h | 8 +++++
arch/arm/mm/highmem.c | 56 +++++++++++++++++++++++++++++++++------
include/linux/highmem.h | 1
3 files changed, 57 insertions(+), 8 deletions(-)
--- a/arch/arm/include/asm/switch_to.h
+++ b/arch/arm/include/asm/switch_to.h
@@ -4,6 +4,13 @@
#include <linux/thread_info.h>
+#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM
+void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p);
+#else
+static inline void
+switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
+#endif
+
/*
* For v7 SMP cores running a preemptible kernel we may be pre-empted
* during a TLB maintenance operation, so execute an inner-shareable dsb
@@ -26,6 +33,7 @@ extern struct task_struct *__switch_to(s
#define switch_to(prev,next,last) \
do { \
__complete_pending_tlbi(); \
+ switch_kmaps(prev, next); \
last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \
} while (0)
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -34,6 +34,11 @@ static inline pte_t get_fixmap_pte(unsig
return *ptep;
}
+static unsigned int fixmap_idx(int type)
+{
+ return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
+}
+
void *kmap(struct page *page)
{
might_sleep();
@@ -54,12 +59,13 @@ EXPORT_SYMBOL(kunmap);
void *kmap_atomic(struct page *page)
{
+ pte_t pte = mk_pte(page, kmap_prot);
unsigned int idx;
unsigned long vaddr;
void *kmap;
int type;
- preempt_disable();
+ preempt_disable_nort();
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
@@ -79,7 +85,7 @@ void *kmap_atomic(struct page *page)
type = kmap_atomic_idx_push();
- idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
+ idx = fixmap_idx(type);
vaddr = __fix_to_virt(idx);
#ifdef CONFIG_DEBUG_HIGHMEM
/*
@@ -93,7 +99,10 @@ void *kmap_atomic(struct page *page)
* in place, so the contained TLB flush ensures the TLB is updated
* with the new mapping.
*/
- set_fixmap_pte(idx, mk_pte(page, kmap_prot));
+#ifdef CONFIG_PREEMPT_RT_FULL
+ current->kmap_pte[type] = pte;
+#endif
+ set_fixmap_pte(idx, pte);
return (void *)vaddr;
}
@@ -106,10 +115,13 @@ void __kunmap_atomic(void *kvaddr)
if (kvaddr >= (void *)FIXADDR_START) {
type = kmap_atomic_idx();
- idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
+ idx = fixmap_idx(type);
if (cache_is_vivt())
__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
+#ifdef CONFIG_PREEMPT_RT_FULL
+ current->kmap_pte[type] = __pte(0);
+#endif
#ifdef CONFIG_DEBUG_HIGHMEM
BUG_ON(vaddr != __fix_to_virt(idx));
#else
@@ -122,28 +134,56 @@ void __kunmap_atomic(void *kvaddr)
kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
}
pagefault_enable();
- preempt_enable();
+ preempt_enable_nort();
}
EXPORT_SYMBOL(__kunmap_atomic);
void *kmap_atomic_pfn(unsigned long pfn)
{
+ pte_t pte = pfn_pte(pfn, kmap_prot);
unsigned long vaddr;
int idx, type;
struct page *page = pfn_to_page(pfn);
- preempt_disable();
+ preempt_disable_nort();
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
type = kmap_atomic_idx_push();
- idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
+ idx = fixmap_idx(type);
vaddr = __fix_to_virt(idx);
#ifdef CONFIG_DEBUG_HIGHMEM
BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
#endif
- set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
+#ifdef CONFIG_PREEMPT_RT_FULL
+ current->kmap_pte[type] = pte;
+#endif
+ set_fixmap_pte(idx, pte);
return (void *)vaddr;
}
+#if defined CONFIG_PREEMPT_RT_FULL
+void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
+{
+ int i;
+
+ /*
+ * Clear @prev's kmap_atomic mappings
+ */
+ for (i = 0; i < prev_p->kmap_idx; i++) {
+ int idx = fixmap_idx(i);
+
+ set_fixmap_pte(idx, __pte(0));
+ }
+ /*
+ * Restore @next_p's kmap_atomic mappings
+ */
+ for (i = 0; i < next_p->kmap_idx; i++) {
+ int idx = fixmap_idx(i);
+
+ if (!pte_none(next_p->kmap_pte[i]))
+ set_fixmap_pte(idx, next_p->kmap_pte[i]);
+ }
+}
+#endif
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -8,6 +8,7 @@
#include <linux/mm.h>
#include <linux/uaccess.h>
#include <linux/hardirq.h>
+#include <linux/sched.h>
#include <asm/cacheflush.h>

View File

@ -0,0 +1,28 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 11 Mar 2013 21:37:27 +0100
Subject: arm/highmem: Flush tlb on unmap
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
The tlb should be flushed on unmap and thus make the mapping entry
invalid. This is only done in the non-debug case which does not look
right.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/mm/highmem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -112,10 +112,10 @@ void __kunmap_atomic(void *kvaddr)
__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
#ifdef CONFIG_DEBUG_HIGHMEM
BUG_ON(vaddr != __fix_to_virt(idx));
- set_fixmap_pte(idx, __pte(0));
#else
(void) idx; /* to kill a warning */
#endif
+ set_fixmap_pte(idx, __pte(0));
kmap_atomic_idx_pop();
} else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
/* this address was obtained through kmap_high_get() */

View File

@ -0,0 +1,25 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Dec 2016 17:28:33 +0100
Subject: [PATCH] arm: include definition for cpumask_t
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
This definition gets pulled in by other files. With the (later) split of
RCU and spinlock.h it won't compile anymore.
The split is done in ("rbtree: don't include the rcu header").
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/include/asm/irq.h | 2 ++
1 file changed, 2 insertions(+)
--- a/arch/arm/include/asm/irq.h
+++ b/arch/arm/include/asm/irq.h
@@ -23,6 +23,8 @@
#endif
#ifndef __ASSEMBLY__
+#include <linux/cpumask.h>
+
struct irqaction;
struct pt_regs;
extern void migrate_irqs(void);

View File

@ -0,0 +1,70 @@
From: Yang Shi <yang.shi@linaro.org>
Date: Thu, 10 Nov 2016 16:17:55 -0800
Subject: [PATCH] arm: kprobe: replace patch_lock to raw lock
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
When running kprobe on -rt kernel, the below bug is caught:
BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:931
in_atomic(): 1, irqs_disabled(): 128, pid: 14, name: migration/0
INFO: lockdep is turned off.
irq event stamp: 238
hardirqs last enabled at (237): [<80b5aecc>] _raw_spin_unlock_irqrestore+0x88/0x90
hardirqs last disabled at (238): [<80b56d88>] __schedule+0xec/0x94c
softirqs last enabled at (0): [<80225584>] copy_process.part.5+0x30c/0x1994
softirqs last disabled at (0): [< (null)>] (null)
Preemption disabled at:[<802f2b98>] cpu_stopper_thread+0xc0/0x140
CPU: 0 PID: 14 Comm: migration/0 Tainted: G O 4.8.3-rt2 #1
Hardware name: Freescale LS1021A
[<80212e7c>] (unwind_backtrace) from [<8020cd2c>] (show_stack+0x20/0x24)
[<8020cd2c>] (show_stack) from [<80689e14>] (dump_stack+0xa0/0xcc)
[<80689e14>] (dump_stack) from [<8025a43c>] (___might_sleep+0x1b8/0x2a4)
[<8025a43c>] (___might_sleep) from [<80b5b324>] (rt_spin_lock+0x34/0x74)
[<80b5b324>] (rt_spin_lock) from [<80b5c31c>] (__patch_text_real+0x70/0xe8)
[<80b5c31c>] (__patch_text_real) from [<80b5c3ac>] (patch_text_stop_machine+0x18/0x20)
[<80b5c3ac>] (patch_text_stop_machine) from [<802f2920>] (multi_cpu_stop+0xfc/0x134)
[<802f2920>] (multi_cpu_stop) from [<802f2ba0>] (cpu_stopper_thread+0xc8/0x140)
[<802f2ba0>] (cpu_stopper_thread) from [<802563a4>] (smpboot_thread_fn+0x1a4/0x354)
[<802563a4>] (smpboot_thread_fn) from [<80251d38>] (kthread+0x104/0x11c)
[<80251d38>] (kthread) from [<80207f70>] (ret_from_fork+0x14/0x24)
Since patch_text_stop_machine() is called in stop_machine() which disables IRQ,
sleepable lock should be not used in this atomic context, so replace patch_lock
to raw lock.
Signed-off-by: Yang Shi <yang.shi@linaro.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/kernel/patch.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/arch/arm/kernel/patch.c
+++ b/arch/arm/kernel/patch.c
@@ -16,7 +16,7 @@ struct patch {
unsigned int insn;
};
-static DEFINE_SPINLOCK(patch_lock);
+static DEFINE_RAW_SPINLOCK(patch_lock);
static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
__acquires(&patch_lock)
@@ -33,7 +33,7 @@ static void __kprobes *patch_map(void *a
return addr;
if (flags)
- spin_lock_irqsave(&patch_lock, *flags);
+ raw_spin_lock_irqsave(&patch_lock, *flags);
else
__acquire(&patch_lock);
@@ -48,7 +48,7 @@ static void __kprobes patch_unmap(int fi
clear_fixmap(fixmap);
if (flags)
- spin_unlock_irqrestore(&patch_lock, *flags);
+ raw_spin_unlock_irqrestore(&patch_lock, *flags);
else
__release(&patch_lock);
}

View File

@ -0,0 +1,152 @@
Subject: arm: Add support for lazy preemption
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 31 Oct 2012 12:04:11 +0100
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Implement the arm pieces for lazy preempt.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/arm/Kconfig | 1 +
arch/arm/include/asm/thread_info.h | 8 ++++++--
arch/arm/kernel/asm-offsets.c | 1 +
arch/arm/kernel/entry-armv.S | 19 ++++++++++++++++---
arch/arm/kernel/entry-common.S | 9 +++++++--
arch/arm/kernel/signal.c | 3 ++-
6 files changed, 33 insertions(+), 8 deletions(-)
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -89,6 +89,7 @@ config ARM
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_LAZY
select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE)
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RSEQ
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -49,6 +49,7 @@ struct cpu_context_save {
struct thread_info {
unsigned long flags; /* low level flags */
int preempt_count; /* 0 => preemptable, <0 => bug */
+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
mm_segment_t addr_limit; /* address limit */
struct task_struct *task; /* main task structure */
__u32 cpu; /* cpu */
@@ -139,7 +140,8 @@ extern int vfp_restore_user_hwstate(stru
#define TIF_SYSCALL_TRACE 4 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */
#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
-#define TIF_SECCOMP 7 /* seccomp syscall filtering active */
+#define TIF_SECCOMP 8 /* seccomp syscall filtering active */
+#define TIF_NEED_RESCHED_LAZY 7
#define TIF_NOHZ 12 /* in adaptive nohz mode */
#define TIF_USING_IWMMXT 17
@@ -149,6 +151,7 @@ extern int vfp_restore_user_hwstate(stru
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
@@ -164,7 +167,8 @@ extern int vfp_restore_user_hwstate(stru
* Change these and you break ASM code in entry-common.S
*/
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
- _TIF_NOTIFY_RESUME | _TIF_UPROBE)
+ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
+ _TIF_NEED_RESCHED_LAZY)
#endif /* __KERNEL__ */
#endif /* __ASM_ARM_THREAD_INFO_H */
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -67,6 +67,7 @@ int main(void)
BLANK();
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
+ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
DEFINE(TI_TASK, offsetof(struct thread_info, task));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -216,11 +216,18 @@ ENDPROC(__dabt_svc)
#ifdef CONFIG_PREEMPT
ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
- ldr r0, [tsk, #TI_FLAGS] @ get flags
teq r8, #0 @ if preempt count != 0
+ bne 1f @ return from exeption
+ ldr r0, [tsk, #TI_FLAGS] @ get flags
+ tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
+ blne svc_preempt @ preempt!
+
+ ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
+ teq r8, #0 @ if preempt lazy count != 0
movne r0, #0 @ force flags to 0
- tst r0, #_TIF_NEED_RESCHED
+ tst r0, #_TIF_NEED_RESCHED_LAZY
blne svc_preempt
+1:
#endif
svc_exit r5, irq = 1 @ return from exception
@@ -235,8 +242,14 @@ ENDPROC(__irq_svc)
1: bl preempt_schedule_irq @ irq en/disable is done inside
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
tst r0, #_TIF_NEED_RESCHED
+ bne 1b
+ tst r0, #_TIF_NEED_RESCHED_LAZY
reteq r8 @ go again
- b 1b
+ ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
+ teq r0, #0 @ if preempt lazy count != 0
+ beq 1b
+ ret r8 @ go again
+
#endif
__und_fault:
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -56,7 +56,9 @@ saved_pc .req lr
cmp r2, #TASK_SIZE
blne addr_limit_check_failed
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
- tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+ tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
+ bne fast_work_pending
+ tst r1, #_TIF_SECCOMP
bne fast_work_pending
@@ -93,8 +95,11 @@ ENDPROC(ret_fast_syscall)
cmp r2, #TASK_SIZE
blne addr_limit_check_failed
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
- tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+ tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
+ bne do_slower_path
+ tst r1, #_TIF_SECCOMP
beq no_work_pending
+do_slower_path:
UNWIND(.fnend )
ENDPROC(ret_fast_syscall)
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -644,7 +644,8 @@ do_work_pending(struct pt_regs *regs, un
*/
trace_hardirqs_off();
do {
- if (likely(thread_flags & _TIF_NEED_RESCHED)) {
+ if (likely(thread_flags & (_TIF_NEED_RESCHED |
+ _TIF_NEED_RESCHED_LAZY))) {
schedule();
} else {
if (unlikely(!user_mode(regs)))

View File

@ -0,0 +1,84 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 20 Sep 2013 14:31:54 +0200
Subject: arm/unwind: use a raw_spin_lock
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Mostly unwind is done with irqs enabled however SLUB may call it with
irqs disabled while creating a new SLUB cache.
I had system freeze while loading a module which called
kmem_cache_create() on init. That means SLUB's __slab_alloc() disabled
interrupts and then
->new_slab_objects()
->new_slab()
->setup_object()
->setup_object_debug()
->init_tracking()
->set_track()
->save_stack_trace()
->save_stack_trace_tsk()
->walk_stackframe()
->unwind_frame()
->unwind_find_idx()
=>spin_lock_irqsave(&unwind_lock);
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/kernel/unwind.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -93,7 +93,7 @@ extern const struct unwind_idx __start_u
static const struct unwind_idx *__origin_unwind_idx;
extern const struct unwind_idx __stop_unwind_idx[];
-static DEFINE_SPINLOCK(unwind_lock);
+static DEFINE_RAW_SPINLOCK(unwind_lock);
static LIST_HEAD(unwind_tables);
/* Convert a prel31 symbol to an absolute address */
@@ -201,7 +201,7 @@ static const struct unwind_idx *unwind_f
/* module unwind tables */
struct unwind_table *table;
- spin_lock_irqsave(&unwind_lock, flags);
+ raw_spin_lock_irqsave(&unwind_lock, flags);
list_for_each_entry(table, &unwind_tables, list) {
if (addr >= table->begin_addr &&
addr < table->end_addr) {
@@ -213,7 +213,7 @@ static const struct unwind_idx *unwind_f
break;
}
}
- spin_unlock_irqrestore(&unwind_lock, flags);
+ raw_spin_unlock_irqrestore(&unwind_lock, flags);
}
pr_debug("%s: idx = %p\n", __func__, idx);
@@ -529,9 +529,9 @@ struct unwind_table *unwind_table_add(un
tab->begin_addr = text_addr;
tab->end_addr = text_addr + text_size;
- spin_lock_irqsave(&unwind_lock, flags);
+ raw_spin_lock_irqsave(&unwind_lock, flags);
list_add_tail(&tab->list, &unwind_tables);
- spin_unlock_irqrestore(&unwind_lock, flags);
+ raw_spin_unlock_irqrestore(&unwind_lock, flags);
return tab;
}
@@ -543,9 +543,9 @@ void unwind_table_del(struct unwind_tabl
if (!tab)
return;
- spin_lock_irqsave(&unwind_lock, flags);
+ raw_spin_lock_irqsave(&unwind_lock, flags);
list_del(&tab->list);
- spin_unlock_irqrestore(&unwind_lock, flags);
+ raw_spin_unlock_irqrestore(&unwind_lock, flags);
kfree(tab);
}

View File

@ -0,0 +1,72 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 26 Jul 2018 09:13:42 +0200
Subject: [PATCH] arm64: KVM: compute_layout before altenates are applied
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
compute_layout() is invoked as part of an alternative fixup under
stop_machine() and needs a sleeping lock as part of get_random_long().
Invoke compute_layout() before the alternatives are applied.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm64/include/asm/alternative.h | 6 ++++++
arch/arm64/kernel/alternative.c | 1 +
arch/arm64/kvm/va_layout.c | 7 +------
3 files changed, 8 insertions(+), 6 deletions(-)
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -35,6 +35,12 @@ void apply_alternatives_module(void *sta
static inline void apply_alternatives_module(void *start, size_t length) { }
#endif
+#ifdef CONFIG_KVM_ARM_HOST
+void kvm_compute_layout(void);
+#else
+static inline void kvm_compute_layout(void) { }
+#endif
+
#define ALTINSTR_ENTRY(feature,cb) \
" .word 661b - .\n" /* label */ \
" .if " __stringify(cb) " == 0\n" \
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -224,6 +224,7 @@ static int __apply_alternatives_multi_st
void __init apply_alternatives_all(void)
{
/* better not try code patching on a live SMP system */
+ kvm_compute_layout();
stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask);
}
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -33,7 +33,7 @@ static u8 tag_lsb;
static u64 tag_val;
static u64 va_mask;
-static void compute_layout(void)
+__init void kvm_compute_layout(void)
{
phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
u64 hyp_va_msb;
@@ -121,8 +121,6 @@ void __init kvm_update_va_mask(struct al
BUG_ON(nr_inst != 5);
- if (!has_vhe() && !va_mask)
- compute_layout();
for (i = 0; i < nr_inst; i++) {
u32 rd, rn, insn, oinsn;
@@ -167,9 +165,6 @@ void kvm_patch_vector_branch(struct alt_
return;
}
- if (!va_mask)
- compute_layout();
-
/*
* Compute HYP VA by using the same computation as kern_hyp_va()
*/

View File

@ -0,0 +1,165 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 25 Jul 2018 14:02:38 +0200
Subject: [PATCH] arm64: fpsimd: use preemp_disable in addition to
local_bh_disable()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The
code disables BH and expects that it is not preemptible. On -RT the
task remains preemptible but remains the same CPU. This may corrupt the
content of the SIMD registers if the task is preempted during
saving/restoring those registers.
Add preempt_disable()/enable() to enfore the required semantic on -RT.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm64/kernel/fpsimd.c | 31 +++++++++++++++++++++++++++++--
1 file changed, 29 insertions(+), 2 deletions(-)
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -159,6 +159,16 @@ static void sve_free(struct task_struct
__sve_free(task);
}
+static void *sve_free_atomic(struct task_struct *task)
+{
+ void *sve_state = task->thread.sve_state;
+
+ WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
+
+ task->thread.sve_state = NULL;
+ return sve_state;
+}
+
static void change_cpacr(u64 val, u64 mask)
{
u64 cpacr = read_sysreg(CPACR_EL1);
@@ -566,6 +576,7 @@ int sve_set_vector_length(struct task_st
* non-SVE thread.
*/
if (task == current) {
+ preempt_disable();
local_bh_disable();
fpsimd_save();
@@ -576,8 +587,10 @@ int sve_set_vector_length(struct task_st
if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
sve_to_fpsimd(task);
- if (task == current)
+ if (task == current) {
local_bh_enable();
+ preempt_enable();
+ }
/*
* Force reallocation of task SVE state to the correct size
@@ -832,6 +845,7 @@ asmlinkage void do_sve_acc(unsigned int
sve_alloc(current);
+ preempt_disable();
local_bh_disable();
fpsimd_save();
@@ -845,6 +859,7 @@ asmlinkage void do_sve_acc(unsigned int
WARN_ON(1); /* SVE access shouldn't have trapped */
local_bh_enable();
+ preempt_enable();
}
/*
@@ -911,10 +926,12 @@ void fpsimd_thread_switch(struct task_st
void fpsimd_flush_thread(void)
{
int vl, supported_vl;
+ void *mem = NULL;
if (!system_supports_fpsimd())
return;
+ preempt_disable();
local_bh_disable();
memset(&current->thread.uw.fpsimd_state, 0,
@@ -923,7 +940,7 @@ void fpsimd_flush_thread(void)
if (system_supports_sve()) {
clear_thread_flag(TIF_SVE);
- sve_free(current);
+ mem = sve_free_atomic(current);
/*
* Reset the task vector length as required.
@@ -959,6 +976,8 @@ void fpsimd_flush_thread(void)
set_thread_flag(TIF_FOREIGN_FPSTATE);
local_bh_enable();
+ preempt_enable();
+ kfree(mem);
}
/*
@@ -970,9 +989,11 @@ void fpsimd_preserve_current_state(void)
if (!system_supports_fpsimd())
return;
+ preempt_disable();
local_bh_disable();
fpsimd_save();
local_bh_enable();
+ preempt_enable();
}
/*
@@ -1030,6 +1051,7 @@ void fpsimd_restore_current_state(void)
if (!system_supports_fpsimd())
return;
+ preempt_disable();
local_bh_disable();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
@@ -1038,6 +1060,7 @@ void fpsimd_restore_current_state(void)
}
local_bh_enable();
+ preempt_enable();
}
/*
@@ -1050,6 +1073,7 @@ void fpsimd_update_current_state(struct
if (!system_supports_fpsimd())
return;
+ preempt_disable();
local_bh_disable();
current->thread.uw.fpsimd_state = *state;
@@ -1062,6 +1086,7 @@ void fpsimd_update_current_state(struct
clear_thread_flag(TIF_FOREIGN_FPSTATE);
local_bh_enable();
+ preempt_enable();
}
/*
@@ -1107,6 +1132,7 @@ void kernel_neon_begin(void)
BUG_ON(!may_use_simd());
+ preempt_disable();
local_bh_disable();
__this_cpu_write(kernel_neon_busy, true);
@@ -1120,6 +1146,7 @@ void kernel_neon_begin(void)
preempt_disable();
local_bh_enable();
+ preempt_enable();
}
EXPORT_SYMBOL(kernel_neon_begin);

View File

@ -0,0 +1,92 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 09 Mar 2016 10:51:06 +0100
Subject: arm: at91: do not disable/enable clocks in a row
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Currently the driver will disable the clock and enable it one line later
if it is switching from periodic mode into one shot.
This can be avoided and causes a needless warning on -RT.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/clocksource/tcb_clksrc.c | 33 +++++++++++++++++++++++++++++----
1 file changed, 29 insertions(+), 4 deletions(-)
--- a/drivers/clocksource/tcb_clksrc.c
+++ b/drivers/clocksource/tcb_clksrc.c
@@ -126,6 +126,7 @@ static struct clocksource clksrc = {
struct tc_clkevt_device {
struct clock_event_device clkevt;
struct clk *clk;
+ bool clk_enabled;
void __iomem *regs;
};
@@ -143,6 +144,24 @@ static struct tc_clkevt_device *to_tc_cl
*/
static u32 timer_clock;
+static void tc_clk_disable(struct clock_event_device *d)
+{
+ struct tc_clkevt_device *tcd = to_tc_clkevt(d);
+
+ clk_disable(tcd->clk);
+ tcd->clk_enabled = false;
+}
+
+static void tc_clk_enable(struct clock_event_device *d)
+{
+ struct tc_clkevt_device *tcd = to_tc_clkevt(d);
+
+ if (tcd->clk_enabled)
+ return;
+ clk_enable(tcd->clk);
+ tcd->clk_enabled = true;
+}
+
static int tc_shutdown(struct clock_event_device *d)
{
struct tc_clkevt_device *tcd = to_tc_clkevt(d);
@@ -150,8 +169,14 @@ static int tc_shutdown(struct clock_even
writel(0xff, regs + ATMEL_TC_REG(2, IDR));
writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR));
+ return 0;
+}
+
+static int tc_shutdown_clk_off(struct clock_event_device *d)
+{
+ tc_shutdown(d);
if (!clockevent_state_detached(d))
- clk_disable(tcd->clk);
+ tc_clk_disable(d);
return 0;
}
@@ -164,7 +189,7 @@ static int tc_set_oneshot(struct clock_e
if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
tc_shutdown(d);
- clk_enable(tcd->clk);
+ tc_clk_enable(d);
/* slow clock, count up to RC, then irq and stop */
writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE |
@@ -186,7 +211,7 @@ static int tc_set_periodic(struct clock_
/* By not making the gentime core emulate periodic mode on top
* of oneshot, we get lower overhead and improved accuracy.
*/
- clk_enable(tcd->clk);
+ tc_clk_enable(d);
/* slow clock, count up to RC, then irq and restart */
writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
@@ -220,7 +245,7 @@ static struct tc_clkevt_device clkevt =
/* Should be lower than at91rm9200's system timer */
.rating = 125,
.set_next_event = tc_next_event,
- .set_state_shutdown = tc_shutdown,
+ .set_state_shutdown = tc_shutdown_clk_off,
.set_state_periodic = tc_set_periodic,
.set_state_oneshot = tc_set_oneshot,
},

View File

@ -0,0 +1,112 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 13 Mar 2018 13:49:16 +0100
Subject: [PATCH] block: blk-mq: move blk_queue_usage_counter_release()
into process context
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
| BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:914
| in_atomic(): 1, irqs_disabled(): 0, pid: 255, name: kworker/u257:6
| 5 locks held by kworker/u257:6/255:
| #0: ("events_unbound"){.+.+.+}, at: [<ffffffff8108edf1>] process_one_work+0x171/0x5e0
| #1: ((&entry->work)){+.+.+.}, at: [<ffffffff8108edf1>] process_one_work+0x171/0x5e0
| #2: (&shost->scan_mutex){+.+.+.}, at: [<ffffffffa000faa3>] __scsi_add_device+0xa3/0x130 [scsi_mod]
| #3: (&set->tag_list_lock){+.+...}, at: [<ffffffff812f09fa>] blk_mq_init_queue+0x96a/0xa50
| #4: (rcu_read_lock_sched){......}, at: [<ffffffff8132887d>] percpu_ref_kill_and_confirm+0x1d/0x120
| Preemption disabled at:[<ffffffff812eff76>] blk_mq_freeze_queue_start+0x56/0x70
|
| CPU: 2 PID: 255 Comm: kworker/u257:6 Not tainted 3.18.7-rt0+ #1
| Workqueue: events_unbound async_run_entry_fn
| 0000000000000003 ffff8800bc29f998 ffffffff815b3a12 0000000000000000
| 0000000000000000 ffff8800bc29f9b8 ffffffff8109aa16 ffff8800bc29fa28
| ffff8800bc5d1bc8 ffff8800bc29f9e8 ffffffff815b8dd4 ffff880000000000
| Call Trace:
| [<ffffffff815b3a12>] dump_stack+0x4f/0x7c
| [<ffffffff8109aa16>] __might_sleep+0x116/0x190
| [<ffffffff815b8dd4>] rt_spin_lock+0x24/0x60
| [<ffffffff810b6089>] __wake_up+0x29/0x60
| [<ffffffff812ee06e>] blk_mq_usage_counter_release+0x1e/0x20
| [<ffffffff81328966>] percpu_ref_kill_and_confirm+0x106/0x120
| [<ffffffff812eff76>] blk_mq_freeze_queue_start+0x56/0x70
| [<ffffffff812f0000>] blk_mq_update_tag_set_depth+0x40/0xd0
| [<ffffffff812f0a1c>] blk_mq_init_queue+0x98c/0xa50
| [<ffffffffa000dcf0>] scsi_mq_alloc_queue+0x20/0x60 [scsi_mod]
| [<ffffffffa000ea35>] scsi_alloc_sdev+0x2f5/0x370 [scsi_mod]
| [<ffffffffa000f494>] scsi_probe_and_add_lun+0x9e4/0xdd0 [scsi_mod]
| [<ffffffffa000fb26>] __scsi_add_device+0x126/0x130 [scsi_mod]
| [<ffffffffa013033f>] ata_scsi_scan_host+0xaf/0x200 [libata]
| [<ffffffffa012b5b6>] async_port_probe+0x46/0x60 [libata]
| [<ffffffff810978fb>] async_run_entry_fn+0x3b/0xf0
| [<ffffffff8108ee81>] process_one_work+0x201/0x5e0
percpu_ref_kill_and_confirm() invokes blk_mq_usage_counter_release() in
a rcu-sched region. swait based wake queue can't be used due to
wake_up_all() usage and disabled interrupts in !RT configs (as reported
by Corey Minyard).
The wq_has_sleeper() check has been suggested by Peter Zijlstra.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
block/blk-core.c | 14 +++++++++++++-
include/linux/blkdev.h | 2 ++
2 files changed, 15 insertions(+), 1 deletion(-)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -959,12 +959,21 @@ void blk_queue_exit(struct request_queue
percpu_ref_put(&q->q_usage_counter);
}
+static void blk_queue_usage_counter_release_swork(struct swork_event *sev)
+{
+ struct request_queue *q =
+ container_of(sev, struct request_queue, mq_pcpu_wake);
+
+ wake_up_all(&q->mq_freeze_wq);
+}
+
static void blk_queue_usage_counter_release(struct percpu_ref *ref)
{
struct request_queue *q =
container_of(ref, struct request_queue, q_usage_counter);
- wake_up_all(&q->mq_freeze_wq);
+ if (wq_has_sleeper(&q->mq_freeze_wq))
+ swork_queue(&q->mq_pcpu_wake);
}
static void blk_rq_timed_out_timer(struct timer_list *t)
@@ -1058,6 +1067,7 @@ struct request_queue *blk_alloc_queue_no
queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q);
init_waitqueue_head(&q->mq_freeze_wq);
+ INIT_SWORK(&q->mq_pcpu_wake, blk_queue_usage_counter_release_swork);
/*
* Init percpu_ref in atomic mode so that it's faster to shutdown.
@@ -3939,6 +3949,8 @@ int __init blk_dev_init(void)
if (!kblockd_workqueue)
panic("Failed to create kblockd\n");
+ BUG_ON(swork_get());
+
request_cachep = kmem_cache_create("blkdev_requests",
sizeof(struct request), 0, SLAB_PANIC, NULL);
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -27,6 +27,7 @@
#include <linux/percpu-refcount.h>
#include <linux/scatterlist.h>
#include <linux/blkzoned.h>
+#include <linux/swork.h>
#include <linux/seqlock.h>
#include <linux/u64_stats_sync.h>
@@ -651,6 +652,7 @@ struct request_queue {
#endif
struct rcu_head rcu_head;
wait_queue_head_t mq_freeze_wq;
+ struct swork_event mq_pcpu_wake;
struct percpu_ref q_usage_counter;
struct list_head all_q_node;

View File

@ -0,0 +1,107 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 29 Jan 2015 15:10:08 +0100
Subject: block/mq: don't complete requests via IPI
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
The IPI runs in hardirq context and there are sleeping locks. This patch
moves the completion into a workqueue.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
block/blk-core.c | 3 +++
block/blk-mq.c | 23 +++++++++++++++++++++++
include/linux/blk-mq.h | 2 +-
include/linux/blkdev.h | 3 +++
4 files changed, 30 insertions(+), 1 deletion(-)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -189,6 +189,9 @@ void blk_rq_init(struct request_queue *q
INIT_LIST_HEAD(&rq->queuelist);
INIT_LIST_HEAD(&rq->timeout_list);
+#ifdef CONFIG_PREEMPT_RT_FULL
+ INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
+#endif
rq->cpu = -1;
rq->q = q;
rq->__sector = (sector_t) -1;
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -320,6 +320,9 @@ static struct request *blk_mq_rq_ctx_ini
rq->extra_len = 0;
rq->__deadline = 0;
+#ifdef CONFIG_PREEMPT_RT_FULL
+ INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
+#endif
INIT_LIST_HEAD(&rq->timeout_list);
rq->timeout = 0;
@@ -545,12 +548,24 @@ void blk_mq_end_request(struct request *
}
EXPORT_SYMBOL(blk_mq_end_request);
+#ifdef CONFIG_PREEMPT_RT_FULL
+
+void __blk_mq_complete_request_remote_work(struct work_struct *work)
+{
+ struct request *rq = container_of(work, struct request, work);
+
+ rq->q->softirq_done_fn(rq);
+}
+
+#else
+
static void __blk_mq_complete_request_remote(void *data)
{
struct request *rq = data;
rq->q->softirq_done_fn(rq);
}
+#endif
static void __blk_mq_complete_request(struct request *rq)
{
@@ -573,10 +588,18 @@ static void __blk_mq_complete_request(st
shared = cpus_share_cache(cpu, ctx->cpu);
if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
+#ifdef CONFIG_PREEMPT_RT_FULL
+ /*
+ * We could force QUEUE_FLAG_SAME_FORCE then we would not get in
+ * here. But we could try to invoke it one the CPU like this.
+ */
+ schedule_work_on(ctx->cpu, &rq->work);
+#else
rq->csd.func = __blk_mq_complete_request_remote;
rq->csd.info = rq;
rq->csd.flags = 0;
smp_call_function_single_async(ctx->cpu, &rq->csd);
+#endif
} else {
rq->q->softirq_done_fn(rq);
}
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -247,7 +247,7 @@ static inline u16 blk_mq_unique_tag_to_t
return unique_tag & BLK_MQ_UNIQUE_TAG_MASK;
}
-
+void __blk_mq_complete_request_remote_work(struct work_struct *work);
int blk_mq_request_started(struct request *rq);
void blk_mq_start_request(struct request *rq);
void blk_mq_end_request(struct request *rq, blk_status_t error);
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -151,6 +151,9 @@ enum mq_rq_state {
*/
struct request {
struct request_queue *q;
+#ifdef CONFIG_PREEMPT_RT_FULL
+ struct work_struct work;
+#endif
struct blk_mq_ctx *mq_ctx;
int cpu;

View File

@ -0,0 +1,52 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 14 Jul 2015 14:26:34 +0200
Subject: block/mq: do not invoke preempt_disable()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
preempt_disable() and get_cpu() don't play well together with the sleeping
locks it tries to allocate later.
It seems to be enough to replace it with get_cpu_light() and migrate_disable().
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
block/blk-mq.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -568,7 +568,7 @@ static void __blk_mq_complete_request(st
return;
}
- cpu = get_cpu();
+ cpu = get_cpu_light();
if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
shared = cpus_share_cache(cpu, ctx->cpu);
@@ -580,7 +580,7 @@ static void __blk_mq_complete_request(st
} else {
rq->q->softirq_done_fn(rq);
}
- put_cpu();
+ put_cpu_light();
}
static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx)
@@ -1324,14 +1324,14 @@ static void __blk_mq_delay_run_hw_queue(
return;
if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
- int cpu = get_cpu();
+ int cpu = get_cpu_light();
if (cpumask_test_cpu(cpu, hctx->cpumask)) {
__blk_mq_run_hw_queue(hctx);
- put_cpu();
+ put_cpu_light();
return;
}
- put_cpu();
+ put_cpu_light();
}
kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,

View File

@ -0,0 +1,30 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 9 Apr 2014 10:37:23 +0200
Subject: block: mq: use cpu_light()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
there is a might sleep splat because get_cpu() disables preemption and
later we grab a lock. As a workaround for this we use get_cpu_light().
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
block/blk-mq.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -112,12 +112,12 @@ static inline struct blk_mq_ctx *__blk_m
*/
static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
{
- return __blk_mq_get_ctx(q, get_cpu());
+ return __blk_mq_get_ctx(q, get_cpu_light());
}
static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
{
- put_cpu();
+ put_cpu_light();
}
struct blk_mq_alloc_data {

View File

@ -0,0 +1,46 @@
Subject: block: Use cpu_chill() for retry loops
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 20 Dec 2012 18:28:26 +0100
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Retry loops on RT might loop forever when the modifying side was
preempted. Steven also observed a live lock when there was a
concurrent priority boosting going on.
Use cpu_chill() instead of cpu_relax() to let the system
make progress.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
block/blk-ioc.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -9,6 +9,7 @@
#include <linux/blkdev.h>
#include <linux/slab.h>
#include <linux/sched/task.h>
+#include <linux/delay.h>
#include "blk.h"
@@ -118,7 +119,7 @@ static void ioc_release_fn(struct work_s
spin_unlock(q->queue_lock);
} else {
spin_unlock_irqrestore(&ioc->lock, flags);
- cpu_relax();
+ cpu_chill();
spin_lock_irqsave_nested(&ioc->lock, flags, 1);
}
}
@@ -202,7 +203,7 @@ void put_io_context_active(struct io_con
spin_unlock(icq->q->queue_lock);
} else {
spin_unlock_irqrestore(&ioc->lock, flags);
- cpu_relax();
+ cpu_chill();
goto retry;
}
}

View File

@ -0,0 +1,271 @@
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 9 Jul 2018 17:48:54 -0400
Subject: [PATCH] cgroup/tracing: Move taking of spin lock out of trace event
handlers
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
[ Upstream commit e4f8d81c738db6d3ffdabfb8329aa2feaa310699 ]
It is unwise to take spin locks from the handlers of trace events.
Mainly, because they can introduce lockups, because it introduces locks
in places that are normally not tested. Worse yet, because trace events
are tucked away in the include/trace/events/ directory, locks that are
taken there are forgotten about.
As a general rule, I tell people never to take any locks in a trace
event handler.
Several cgroup trace event handlers call cgroup_path() which eventually
takes the kernfs_rename_lock spinlock. This injects the spinlock in the
code without people realizing it. It also can cause issues for the
PREEMPT_RT patch, as the spinlock becomes a mutex, and the trace event
handlers are called with preemption disabled.
By moving the calculation of the cgroup_path() out of the trace event
handlers and into a macro (surrounded by a
trace_cgroup_##type##_enabled()), then we could place the cgroup_path
into a string, and pass that to the trace event. Not only does this
remove the taking of the spinlock out of the trace event handler, but
it also means that the cgroup_path() only needs to be called once (it
is currently called twice, once to get the length to reserver the
buffer for, and once again to get the path itself. Now it only needs to
be done once.
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/trace/events/cgroup.h | 47 +++++++++++++++++++---------------------
kernel/cgroup/cgroup-internal.h | 26 ++++++++++++++++++++++
kernel/cgroup/cgroup-v1.c | 4 +--
kernel/cgroup/cgroup.c | 12 +++++-----
4 files changed, 58 insertions(+), 31 deletions(-)
--- a/include/trace/events/cgroup.h
+++ b/include/trace/events/cgroup.h
@@ -53,24 +53,22 @@ DEFINE_EVENT(cgroup_root, cgroup_remount
DECLARE_EVENT_CLASS(cgroup,
- TP_PROTO(struct cgroup *cgrp),
+ TP_PROTO(struct cgroup *cgrp, const char *path),
- TP_ARGS(cgrp),
+ TP_ARGS(cgrp, path),
TP_STRUCT__entry(
__field( int, root )
__field( int, id )
__field( int, level )
- __dynamic_array(char, path,
- cgroup_path(cgrp, NULL, 0) + 1)
+ __string( path, path )
),
TP_fast_assign(
__entry->root = cgrp->root->hierarchy_id;
__entry->id = cgrp->id;
__entry->level = cgrp->level;
- cgroup_path(cgrp, __get_dynamic_array(path),
- __get_dynamic_array_len(path));
+ __assign_str(path, path);
),
TP_printk("root=%d id=%d level=%d path=%s",
@@ -79,45 +77,45 @@ DECLARE_EVENT_CLASS(cgroup,
DEFINE_EVENT(cgroup, cgroup_mkdir,
- TP_PROTO(struct cgroup *cgroup),
+ TP_PROTO(struct cgroup *cgrp, const char *path),
- TP_ARGS(cgroup)
+ TP_ARGS(cgrp, path)
);
DEFINE_EVENT(cgroup, cgroup_rmdir,
- TP_PROTO(struct cgroup *cgroup),
+ TP_PROTO(struct cgroup *cgrp, const char *path),
- TP_ARGS(cgroup)
+ TP_ARGS(cgrp, path)
);
DEFINE_EVENT(cgroup, cgroup_release,
- TP_PROTO(struct cgroup *cgroup),
+ TP_PROTO(struct cgroup *cgrp, const char *path),
- TP_ARGS(cgroup)
+ TP_ARGS(cgrp, path)
);
DEFINE_EVENT(cgroup, cgroup_rename,
- TP_PROTO(struct cgroup *cgroup),
+ TP_PROTO(struct cgroup *cgrp, const char *path),
- TP_ARGS(cgroup)
+ TP_ARGS(cgrp, path)
);
DECLARE_EVENT_CLASS(cgroup_migrate,
- TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
+ TP_PROTO(struct cgroup *dst_cgrp, const char *path,
+ struct task_struct *task, bool threadgroup),
- TP_ARGS(dst_cgrp, task, threadgroup),
+ TP_ARGS(dst_cgrp, path, task, threadgroup),
TP_STRUCT__entry(
__field( int, dst_root )
__field( int, dst_id )
__field( int, dst_level )
- __dynamic_array(char, dst_path,
- cgroup_path(dst_cgrp, NULL, 0) + 1)
__field( int, pid )
+ __string( dst_path, path )
__string( comm, task->comm )
),
@@ -125,8 +123,7 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
__entry->dst_root = dst_cgrp->root->hierarchy_id;
__entry->dst_id = dst_cgrp->id;
__entry->dst_level = dst_cgrp->level;
- cgroup_path(dst_cgrp, __get_dynamic_array(dst_path),
- __get_dynamic_array_len(dst_path));
+ __assign_str(dst_path, path);
__entry->pid = task->pid;
__assign_str(comm, task->comm);
),
@@ -138,16 +135,18 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
DEFINE_EVENT(cgroup_migrate, cgroup_attach_task,
- TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
+ TP_PROTO(struct cgroup *dst_cgrp, const char *path,
+ struct task_struct *task, bool threadgroup),
- TP_ARGS(dst_cgrp, task, threadgroup)
+ TP_ARGS(dst_cgrp, path, task, threadgroup)
);
DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks,
- TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
+ TP_PROTO(struct cgroup *dst_cgrp, const char *path,
+ struct task_struct *task, bool threadgroup),
- TP_ARGS(dst_cgrp, task, threadgroup)
+ TP_ARGS(dst_cgrp, path, task, threadgroup)
);
#endif /* _TRACE_CGROUP_H */
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -8,6 +8,32 @@
#include <linux/list.h>
#include <linux/refcount.h>
+#define TRACE_CGROUP_PATH_LEN 1024
+extern spinlock_t trace_cgroup_path_lock;
+extern char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
+
+/*
+ * cgroup_path() takes a spin lock. It is good practice not to take
+ * spin locks within trace point handlers, as they are mostly hidden
+ * from normal view. As cgroup_path() can take the kernfs_rename_lock
+ * spin lock, it is best to not call that function from the trace event
+ * handler.
+ *
+ * Note: trace_cgroup_##type##_enabled() is a static branch that will only
+ * be set when the trace event is enabled.
+ */
+#define TRACE_CGROUP_PATH(type, cgrp, ...) \
+ do { \
+ if (trace_cgroup_##type##_enabled()) { \
+ spin_lock(&trace_cgroup_path_lock); \
+ cgroup_path(cgrp, trace_cgroup_path, \
+ TRACE_CGROUP_PATH_LEN); \
+ trace_cgroup_##type(cgrp, trace_cgroup_path, \
+ ##__VA_ARGS__); \
+ spin_unlock(&trace_cgroup_path_lock); \
+ } \
+ } while (0)
+
/*
* A cgroup can be associated with multiple css_sets as different tasks may
* belong to different cgroups on different hierarchies. In the other
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -135,7 +135,7 @@ int cgroup_transfer_tasks(struct cgroup
if (task) {
ret = cgroup_migrate(task, false, &mgctx);
if (!ret)
- trace_cgroup_transfer_tasks(to, task, false);
+ TRACE_CGROUP_PATH(transfer_tasks, to, task, false);
put_task_struct(task);
}
} while (task && !ret);
@@ -865,7 +865,7 @@ static int cgroup1_rename(struct kernfs_
ret = kernfs_rename(kn, new_parent, new_name_str);
if (!ret)
- trace_cgroup_rename(cgrp);
+ TRACE_CGROUP_PATH(rename, cgrp);
mutex_unlock(&cgroup_mutex);
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -83,6 +83,9 @@ EXPORT_SYMBOL_GPL(cgroup_mutex);
EXPORT_SYMBOL_GPL(css_set_lock);
#endif
+DEFINE_SPINLOCK(trace_cgroup_path_lock);
+char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
+
/*
* Protects cgroup_idr and css_idr so that IDs can be released without
* grabbing cgroup_mutex.
@@ -2638,7 +2641,7 @@ int cgroup_attach_task(struct cgroup *ds
cgroup_migrate_finish(&mgctx);
if (!ret)
- trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
+ TRACE_CGROUP_PATH(attach_task, dst_cgrp, leader, threadgroup);
return ret;
}
@@ -4634,7 +4637,7 @@ static void css_release_work_fn(struct w
struct cgroup *tcgrp;
/* cgroup release path */
- trace_cgroup_release(cgrp);
+ TRACE_CGROUP_PATH(release, cgrp);
if (cgroup_on_dfl(cgrp))
cgroup_rstat_flush(cgrp);
@@ -4977,7 +4980,7 @@ int cgroup_mkdir(struct kernfs_node *par
if (ret)
goto out_destroy;
- trace_cgroup_mkdir(cgrp);
+ TRACE_CGROUP_PATH(mkdir, cgrp);
/* let's create and online css's */
kernfs_activate(kn);
@@ -5165,9 +5168,8 @@ int cgroup_rmdir(struct kernfs_node *kn)
return 0;
ret = cgroup_destroy_locked(cgrp);
-
if (!ret)
- trace_cgroup_rmdir(cgrp);
+ TRACE_CGROUP_PATH(rmdir, cgrp);
cgroup_kn_unlock(kn);
return ret;

View File

@ -0,0 +1,44 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 3 Jul 2018 18:19:48 +0200
Subject: [PATCH] cgroup: use irqsave in cgroup_rstat_flush_locked()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
All callers of cgroup_rstat_flush_locked() acquire cgroup_rstat_lock
either with spin_lock_irq() or spin_lock_irqsave().
cgroup_rstat_flush_locked() itself acquires cgroup_rstat_cpu_lock which
is a raw_spin_lock. This lock is also acquired in cgroup_rstat_updated()
in IRQ context and therefore requires _irqsave() locking suffix in
cgroup_rstat_flush_locked().
Since there is no difference between spin_lock_t and raw_spin_lock_t
on !RT lockdep does not complain here. On RT lockdep complains because
the interrupts were not disabled here and a deadlock is possible.
Acquire the raw_spin_lock_t with disabled interrupts.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/cgroup/rstat.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -157,8 +157,9 @@ static void cgroup_rstat_flush_locked(st
raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
cpu);
struct cgroup *pos = NULL;
+ unsigned long flags;
- raw_spin_lock(cpu_lock);
+ raw_spin_lock_irqsave(cpu_lock, flags);
while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
struct cgroup_subsys_state *css;
@@ -170,7 +171,7 @@ static void cgroup_rstat_flush_locked(st
css->ss->css_rstat_flush(css, cpu);
rcu_read_unlock();
}
- raw_spin_unlock(cpu_lock);
+ raw_spin_unlock_irqrestore(cpu_lock, flags);
/* if @may_sleep, play nice and yield if necessary */
if (may_sleep && (need_resched() ||

View File

@ -0,0 +1,87 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 13 Feb 2015 15:52:24 +0100
Subject: cgroups: use simple wait in css_release()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
To avoid:
|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:914
|in_atomic(): 1, irqs_disabled(): 0, pid: 92, name: rcuc/11
|2 locks held by rcuc/11/92:
| #0: (rcu_callback){......}, at: [<ffffffff810e037e>] rcu_cpu_kthread+0x3de/0x940
| #1: (rcu_read_lock_sched){......}, at: [<ffffffff81328390>] percpu_ref_call_confirm_rcu+0x0/0xd0
|Preemption disabled at:[<ffffffff813284e2>] percpu_ref_switch_to_atomic_rcu+0x82/0xc0
|CPU: 11 PID: 92 Comm: rcuc/11 Not tainted 3.18.7-rt0+ #1
| ffff8802398cdf80 ffff880235f0bc28 ffffffff815b3a12 0000000000000000
| 0000000000000000 ffff880235f0bc48 ffffffff8109aa16 0000000000000000
| ffff8802398cdf80 ffff880235f0bc78 ffffffff815b8dd4 000000000000df80
|Call Trace:
| [<ffffffff815b3a12>] dump_stack+0x4f/0x7c
| [<ffffffff8109aa16>] __might_sleep+0x116/0x190
| [<ffffffff815b8dd4>] rt_spin_lock+0x24/0x60
| [<ffffffff8108d2cd>] queue_work_on+0x6d/0x1d0
| [<ffffffff8110c881>] css_release+0x81/0x90
| [<ffffffff8132844e>] percpu_ref_call_confirm_rcu+0xbe/0xd0
| [<ffffffff813284e2>] percpu_ref_switch_to_atomic_rcu+0x82/0xc0
| [<ffffffff810e03e5>] rcu_cpu_kthread+0x445/0x940
| [<ffffffff81098a2d>] smpboot_thread_fn+0x18d/0x2d0
| [<ffffffff810948d8>] kthread+0xe8/0x100
| [<ffffffff815b9c3c>] ret_from_fork+0x7c/0xb0
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/cgroup-defs.h | 2 ++
kernel/cgroup/cgroup.c | 9 +++++----
2 files changed, 7 insertions(+), 4 deletions(-)
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -20,6 +20,7 @@
#include <linux/u64_stats_sync.h>
#include <linux/workqueue.h>
#include <linux/bpf-cgroup.h>
+#include <linux/swork.h>
#ifdef CONFIG_CGROUPS
@@ -157,6 +158,7 @@ struct cgroup_subsys_state {
/* percpu_ref killing and RCU release */
struct work_struct destroy_work;
+ struct swork_event destroy_swork;
struct rcu_work destroy_rwork;
/*
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4611,10 +4611,10 @@ static void css_free_rwork_fn(struct wor
}
}
-static void css_release_work_fn(struct work_struct *work)
+static void css_release_work_fn(struct swork_event *sev)
{
struct cgroup_subsys_state *css =
- container_of(work, struct cgroup_subsys_state, destroy_work);
+ container_of(sev, struct cgroup_subsys_state, destroy_swork);
struct cgroup_subsys *ss = css->ss;
struct cgroup *cgrp = css->cgroup;
@@ -4674,8 +4674,8 @@ static void css_release(struct percpu_re
struct cgroup_subsys_state *css =
container_of(ref, struct cgroup_subsys_state, refcnt);
- INIT_WORK(&css->destroy_work, css_release_work_fn);
- queue_work(cgroup_destroy_wq, &css->destroy_work);
+ INIT_SWORK(&css->destroy_swork, css_release_work_fn);
+ swork_queue(&css->destroy_swork);
}
static void init_and_link_css(struct cgroup_subsys_state *css,
@@ -5397,6 +5397,7 @@ static int __init cgroup_wq_init(void)
*/
cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
BUG_ON(!cgroup_destroy_wq);
+ BUG_ON(swork_get());
return 0;
}
core_initcall(cgroup_wq_init);

View File

@ -0,0 +1,158 @@
From: Benedikt Spranger <b.spranger@linutronix.de>
Date: Mon, 8 Mar 2010 18:57:04 +0100
Subject: clocksource: TCLIB: Allow higher clock rates for clock events
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
As default the TCLIB uses the 32KiHz base clock rate for clock events.
Add a compile time selection to allow higher clock resulution.
(fixed up by Sami Pietikäinen <Sami.Pietikainen@wapice.com>)
Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/clocksource/tcb_clksrc.c | 36 +++++++++++++++++++++---------------
drivers/misc/Kconfig | 12 ++++++++++--
2 files changed, 31 insertions(+), 17 deletions(-)
--- a/drivers/clocksource/tcb_clksrc.c
+++ b/drivers/clocksource/tcb_clksrc.c
@@ -25,8 +25,7 @@
* this 32 bit free-running counter. the second channel is not used.
*
* - The third channel may be used to provide a 16-bit clockevent
- * source, used in either periodic or oneshot mode. This runs
- * at 32 KiHZ, and can handle delays of up to two seconds.
+ * source, used in either periodic or oneshot mode.
*
* A boot clocksource and clockevent source are also currently needed,
* unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
@@ -127,6 +126,7 @@ struct tc_clkevt_device {
struct clock_event_device clkevt;
struct clk *clk;
bool clk_enabled;
+ u32 freq;
void __iomem *regs;
};
@@ -135,13 +135,6 @@ static struct tc_clkevt_device *to_tc_cl
return container_of(clkevt, struct tc_clkevt_device, clkevt);
}
-/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
- * because using one of the divided clocks would usually mean the
- * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
- *
- * A divided clock could be good for high resolution timers, since
- * 30.5 usec resolution can seem "low".
- */
static u32 timer_clock;
static void tc_clk_disable(struct clock_event_device *d)
@@ -191,7 +184,7 @@ static int tc_set_oneshot(struct clock_e
tc_clk_enable(d);
- /* slow clock, count up to RC, then irq and stop */
+ /* count up to RC, then irq and stop */
writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE |
ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR));
writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
@@ -213,10 +206,10 @@ static int tc_set_periodic(struct clock_
*/
tc_clk_enable(d);
- /* slow clock, count up to RC, then irq and restart */
+ /* count up to RC, then irq and restart */
writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
regs + ATMEL_TC_REG(2, CMR));
- writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
+ writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
/* Enable clock and interrupts on RC compare */
writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
@@ -243,7 +236,11 @@ static struct tc_clkevt_device clkevt =
.features = CLOCK_EVT_FEAT_PERIODIC |
CLOCK_EVT_FEAT_ONESHOT,
/* Should be lower than at91rm9200's system timer */
+#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
.rating = 125,
+#else
+ .rating = 200,
+#endif
.set_next_event = tc_next_event,
.set_state_shutdown = tc_shutdown_clk_off,
.set_state_periodic = tc_set_periodic,
@@ -265,8 +262,9 @@ static irqreturn_t ch2_irq(int irq, void
return IRQ_NONE;
}
-static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
+static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
{
+ unsigned divisor = atmel_tc_divisors[divisor_idx];
int ret;
struct clk *t2_clk = tc->clk[2];
int irq = tc->irq[2];
@@ -287,7 +285,11 @@ static int __init setup_clkevents(struct
clkevt.regs = tc->regs;
clkevt.clk = t2_clk;
- timer_clock = clk32k_divisor_idx;
+ timer_clock = divisor_idx;
+ if (!divisor)
+ clkevt.freq = 32768;
+ else
+ clkevt.freq = clk_get_rate(t2_clk) / divisor;
clkevt.clkevt.cpumask = cpumask_of(0);
@@ -298,7 +300,7 @@ static int __init setup_clkevents(struct
return ret;
}
- clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff);
+ clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff);
return ret;
}
@@ -435,7 +437,11 @@ static int __init tcb_clksrc_init(void)
goto err_disable_t1;
/* channel 2: periodic and oneshot timer support */
+#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
ret = setup_clkevents(tc, clk32k_divisor_idx);
+#else
+ ret = setup_clkevents(tc, best_divisor_idx);
+#endif
if (ret)
goto err_unregister_clksrc;
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -69,8 +69,7 @@ config ATMEL_TCB_CLKSRC
are combined to make a single 32-bit timer.
When GENERIC_CLOCKEVENTS is defined, the third timer channel
- may be used as a clock event device supporting oneshot mode
- (delays of up to two seconds) based on the 32 KiHz clock.
+ may be used as a clock event device supporting oneshot mode.
config ATMEL_TCB_CLKSRC_BLOCK
int
@@ -83,6 +82,15 @@ config ATMEL_TCB_CLKSRC_BLOCK
TC can be used for other purposes, such as PWM generation and
interval timing.
+config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
+ bool "TC Block use 32 KiHz clock"
+ depends on ATMEL_TCB_CLKSRC
+ default y
+ help
+ Select this to use 32 KiHz base clock rate as TC block clock
+ source for clock events.
+
+
config DUMMY_IRQ
tristate "Dummy IRQ handler"
default n

View File

@ -0,0 +1,336 @@
Subject: completion: Use simple wait queues
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 11 Jan 2013 11:23:51 +0100
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Completions have no long lasting callbacks and therefor do not need
the complex waitqueue variant. Use simple waitqueues which reduces the
contention on the waitqueue lock.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/net/wireless/intersil/orinoco/orinoco_usb.c | 2 -
drivers/usb/gadget/function/f_fs.c | 2 -
drivers/usb/gadget/legacy/inode.c | 4 +-
include/linux/completion.h | 8 ++--
include/linux/suspend.h | 6 +++
include/linux/swait.h | 1
kernel/power/hibernate.c | 7 ++++
kernel/power/suspend.c | 4 ++
kernel/sched/completion.c | 34 ++++++++++----------
kernel/sched/core.c | 10 ++++-
kernel/sched/swait.c | 19 +++++++++++
11 files changed, 70 insertions(+), 27 deletions(-)
--- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
+++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
@@ -697,7 +697,7 @@ static void ezusb_req_ctx_wait(struct ez
while (!ctx->done.done && msecs--)
udelay(1000);
} else {
- wait_event_interruptible(ctx->done.wait,
+ swait_event_interruptible(ctx->done.wait,
ctx->done.done);
}
break;
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1633,7 +1633,7 @@ static void ffs_data_put(struct ffs_data
pr_info("%s(): freeing\n", __func__);
ffs_data_clear(ffs);
BUG_ON(waitqueue_active(&ffs->ev.waitq) ||
- waitqueue_active(&ffs->ep0req_completion.wait) ||
+ swait_active(&ffs->ep0req_completion.wait) ||
waitqueue_active(&ffs->wait));
destroy_workqueue(ffs->io_completion_wq);
kfree(ffs->dev_name);
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -343,7 +343,7 @@ ep_io (struct ep_data *epdata, void *buf
spin_unlock_irq (&epdata->dev->lock);
if (likely (value == 0)) {
- value = wait_event_interruptible (done.wait, done.done);
+ value = swait_event_interruptible (done.wait, done.done);
if (value != 0) {
spin_lock_irq (&epdata->dev->lock);
if (likely (epdata->ep != NULL)) {
@@ -352,7 +352,7 @@ ep_io (struct ep_data *epdata, void *buf
usb_ep_dequeue (epdata->ep, epdata->req);
spin_unlock_irq (&epdata->dev->lock);
- wait_event (done.wait, done.done);
+ swait_event (done.wait, done.done);
if (epdata->status == -ECONNRESET)
epdata->status = -EINTR;
} else {
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -9,7 +9,7 @@
* See kernel/sched/completion.c for details.
*/
-#include <linux/wait.h>
+#include <linux/swait.h>
/*
* struct completion - structure used to maintain state for a "completion"
@@ -25,7 +25,7 @@
*/
struct completion {
unsigned int done;
- wait_queue_head_t wait;
+ struct swait_queue_head wait;
};
#define init_completion_map(x, m) __init_completion(x)
@@ -34,7 +34,7 @@ static inline void complete_acquire(stru
static inline void complete_release(struct completion *x) {}
#define COMPLETION_INITIALIZER(work) \
- { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
+ { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
#define COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) \
(*({ init_completion_map(&(work), &(map)); &(work); }))
@@ -85,7 +85,7 @@ static inline void complete_release(stru
static inline void __init_completion(struct completion *x)
{
x->done = 0;
- init_waitqueue_head(&x->wait);
+ init_swait_queue_head(&x->wait);
}
/**
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -196,6 +196,12 @@ struct platform_s2idle_ops {
void (*end)(void);
};
+#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION)
+extern bool pm_in_action;
+#else
+# define pm_in_action false
+#endif
+
#ifdef CONFIG_SUSPEND
extern suspend_state_t mem_sleep_current;
extern suspend_state_t mem_sleep_default;
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -160,6 +160,7 @@ static inline bool swq_has_sleeper(struc
extern void swake_up(struct swait_queue_head *q);
extern void swake_up_all(struct swait_queue_head *q);
extern void swake_up_locked(struct swait_queue_head *q);
+extern void swake_up_all_locked(struct swait_queue_head *q);
extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -679,6 +679,10 @@ static int load_image_and_restore(void)
return error;
}
+#ifndef CONFIG_SUSPEND
+bool pm_in_action;
+#endif
+
/**
* hibernate - Carry out system hibernation, including saving the image.
*/
@@ -692,6 +696,8 @@ int hibernate(void)
return -EPERM;
}
+ pm_in_action = true;
+
lock_system_sleep();
/* The snapshot device should not be opened while we're running */
if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
@@ -770,6 +776,7 @@ int hibernate(void)
atomic_inc(&snapshot_device_available);
Unlock:
unlock_system_sleep();
+ pm_in_action = false;
pr_info("hibernation exit\n");
return error;
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -594,6 +594,8 @@ static int enter_state(suspend_state_t s
return error;
}
+bool pm_in_action;
+
/**
* pm_suspend - Externally visible function for suspending the system.
* @state: System sleep state to enter.
@@ -608,6 +610,7 @@ int pm_suspend(suspend_state_t state)
if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
return -EINVAL;
+ pm_in_action = true;
pr_info("suspend entry (%s)\n", mem_sleep_labels[state]);
error = enter_state(state);
if (error) {
@@ -617,6 +620,7 @@ int pm_suspend(suspend_state_t state)
suspend_stats.success++;
}
pr_info("suspend exit\n");
+ pm_in_action = false;
return error;
}
EXPORT_SYMBOL(pm_suspend);
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -29,12 +29,12 @@ void complete(struct completion *x)
{
unsigned long flags;
- spin_lock_irqsave(&x->wait.lock, flags);
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
if (x->done != UINT_MAX)
x->done++;
- __wake_up_locked(&x->wait, TASK_NORMAL, 1);
- spin_unlock_irqrestore(&x->wait.lock, flags);
+ swake_up_locked(&x->wait);
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete);
@@ -58,10 +58,10 @@ void complete_all(struct completion *x)
{
unsigned long flags;
- spin_lock_irqsave(&x->wait.lock, flags);
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
x->done = UINT_MAX;
- __wake_up_locked(&x->wait, TASK_NORMAL, 0);
- spin_unlock_irqrestore(&x->wait.lock, flags);
+ swake_up_all_locked(&x->wait);
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete_all);
@@ -70,20 +70,20 @@ do_wait_for_common(struct completion *x,
long (*action)(long), long timeout, int state)
{
if (!x->done) {
- DECLARE_WAITQUEUE(wait, current);
+ DECLARE_SWAITQUEUE(wait);
- __add_wait_queue_entry_tail_exclusive(&x->wait, &wait);
+ __prepare_to_swait(&x->wait, &wait);
do {
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
break;
}
__set_current_state(state);
- spin_unlock_irq(&x->wait.lock);
+ raw_spin_unlock_irq(&x->wait.lock);
timeout = action(timeout);
- spin_lock_irq(&x->wait.lock);
+ raw_spin_lock_irq(&x->wait.lock);
} while (!x->done && timeout);
- __remove_wait_queue(&x->wait, &wait);
+ __finish_swait(&x->wait, &wait);
if (!x->done)
return timeout;
}
@@ -100,9 +100,9 @@ static inline long __sched
complete_acquire(x);
- spin_lock_irq(&x->wait.lock);
+ raw_spin_lock_irq(&x->wait.lock);
timeout = do_wait_for_common(x, action, timeout, state);
- spin_unlock_irq(&x->wait.lock);
+ raw_spin_unlock_irq(&x->wait.lock);
complete_release(x);
@@ -291,12 +291,12 @@ bool try_wait_for_completion(struct comp
if (!READ_ONCE(x->done))
return false;
- spin_lock_irqsave(&x->wait.lock, flags);
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = false;
else if (x->done != UINT_MAX)
x->done--;
- spin_unlock_irqrestore(&x->wait.lock, flags);
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
return ret;
}
EXPORT_SYMBOL(try_wait_for_completion);
@@ -322,8 +322,8 @@ bool completion_done(struct completion *
* otherwise we can end up freeing the completion before complete()
* is done referencing it.
*/
- spin_lock_irqsave(&x->wait.lock, flags);
- spin_unlock_irqrestore(&x->wait.lock, flags);
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
return true;
}
EXPORT_SYMBOL(completion_done);
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7130,7 +7130,10 @@ void migrate_disable(void)
return;
}
#ifdef CONFIG_SCHED_DEBUG
- WARN_ON_ONCE(p->migrate_disable_atomic);
+ if (unlikely(p->migrate_disable_atomic)) {
+ tracing_off();
+ WARN_ON_ONCE(1);
+ }
#endif
if (p->migrate_disable) {
@@ -7160,7 +7163,10 @@ void migrate_enable(void)
}
#ifdef CONFIG_SCHED_DEBUG
- WARN_ON_ONCE(p->migrate_disable_atomic);
+ if (unlikely(p->migrate_disable_atomic)) {
+ tracing_off();
+ WARN_ON_ONCE(1);
+ }
#endif
WARN_ON_ONCE(p->migrate_disable <= 0);
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -32,6 +32,25 @@ void swake_up_locked(struct swait_queue_
}
EXPORT_SYMBOL(swake_up_locked);
+void swake_up_all_locked(struct swait_queue_head *q)
+{
+ struct swait_queue *curr;
+ int wakes = 0;
+
+ while (!list_empty(&q->task_list)) {
+
+ curr = list_first_entry(&q->task_list, typeof(*curr),
+ task_list);
+ wake_up_process(curr->task);
+ list_del_init(&curr->task_list);
+ wakes++;
+ }
+ if (pm_in_action)
+ return;
+ WARN(wakes > 2, "complete_all() with %d waiters\n", wakes);
+}
+EXPORT_SYMBOL(swake_up_all_locked);
+
void swake_up(struct swait_queue_head *q)
{
unsigned long flags;

View File

@ -0,0 +1,27 @@
Subject: sched: Use the proper LOCK_OFFSET for cond_resched()
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 17 Jul 2011 22:51:33 +0200
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
RT does not increment preempt count when a 'sleeping' spinlock is
locked. Update PREEMPT_LOCK_OFFSET for that case.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/preempt.h | 4 ++++
1 file changed, 4 insertions(+)
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -118,7 +118,11 @@
/*
* The preempt_count offset after spin_lock()
*/
+#if !defined(CONFIG_PREEMPT_RT_FULL)
#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
+#else
+#define PREEMPT_LOCK_OFFSET 0
+#endif
/*
* The preempt_count offset needed for things like:

View File

@ -0,0 +1,68 @@
From: Mike Galbraith <umgwanakikbuti@gmail.com>
Date: Sun, 16 Oct 2016 05:11:54 +0200
Subject: [PATCH] connector/cn_proc: Protect send_msg() with a local lock
on RT
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:931
|in_atomic(): 1, irqs_disabled(): 0, pid: 31807, name: sleep
|Preemption disabled at:[<ffffffff8148019b>] proc_exit_connector+0xbb/0x140
|
|CPU: 4 PID: 31807 Comm: sleep Tainted: G W E 4.8.0-rt11-rt #106
|Call Trace:
| [<ffffffff813436cd>] dump_stack+0x65/0x88
| [<ffffffff8109c425>] ___might_sleep+0xf5/0x180
| [<ffffffff816406b0>] __rt_spin_lock+0x20/0x50
| [<ffffffff81640978>] rt_read_lock+0x28/0x30
| [<ffffffff8156e209>] netlink_broadcast_filtered+0x49/0x3f0
| [<ffffffff81522621>] ? __kmalloc_reserve.isra.33+0x31/0x90
| [<ffffffff8156e5cd>] netlink_broadcast+0x1d/0x20
| [<ffffffff8147f57a>] cn_netlink_send_mult+0x19a/0x1f0
| [<ffffffff8147f5eb>] cn_netlink_send+0x1b/0x20
| [<ffffffff814801d8>] proc_exit_connector+0xf8/0x140
| [<ffffffff81077f71>] do_exit+0x5d1/0xba0
| [<ffffffff810785cc>] do_group_exit+0x4c/0xc0
| [<ffffffff81078654>] SyS_exit_group+0x14/0x20
| [<ffffffff81640a72>] entry_SYSCALL_64_fastpath+0x1a/0xa4
Since ab8ed951080e ("connector: fix out-of-order cn_proc netlink message
delivery") which is v4.7-rc6.
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/connector/cn_proc.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -32,6 +32,7 @@
#include <linux/pid_namespace.h>
#include <linux/cn_proc.h>
+#include <linux/locallock.h>
/*
* Size of a cn_msg followed by a proc_event structure. Since the
@@ -54,10 +55,11 @@ static struct cb_id cn_proc_event_id = {
/* proc_event_counts is used as the sequence number of the netlink message */
static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 };
+static DEFINE_LOCAL_IRQ_LOCK(send_msg_lock);
static inline void send_msg(struct cn_msg *msg)
{
- preempt_disable();
+ local_lock(send_msg_lock);
msg->seq = __this_cpu_inc_return(proc_event_counts) - 1;
((struct proc_event *)msg->data)->cpu = smp_processor_id();
@@ -70,7 +72,7 @@ static inline void send_msg(struct cn_ms
*/
cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT);
- preempt_enable();
+ local_unlock(send_msg_lock);
}
void proc_fork_connector(struct task_struct *task)

View File

@ -0,0 +1,111 @@
Subject: cpu/hotplug: Implement CPU pinning
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 19 Jul 2017 17:31:20 +0200
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/sched.h | 1 +
kernel/cpu.c | 40 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 41 insertions(+)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -671,6 +671,7 @@ struct task_struct {
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
int migrate_disable;
int migrate_disable_update;
+ int pinned_on_cpu;
# ifdef CONFIG_SCHED_DEBUG
int migrate_disable_atomic;
# endif
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -73,6 +73,11 @@ static DEFINE_PER_CPU(struct cpuhp_cpu_s
.fail = CPUHP_INVALID,
};
+#ifdef CONFIG_HOTPLUG_CPU
+static DEFINE_PER_CPU(struct rt_rw_lock, cpuhp_pin_lock) = \
+ __RWLOCK_RT_INITIALIZER(cpuhp_pin_lock);
+#endif
+
#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
static struct lockdep_map cpuhp_state_up_map =
STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
@@ -287,7 +292,30 @@ static int cpu_hotplug_disabled;
*/
void pin_current_cpu(void)
{
+ struct rt_rw_lock *cpuhp_pin;
+ unsigned int cpu;
+ int ret;
+again:
+ cpuhp_pin = this_cpu_ptr(&cpuhp_pin_lock);
+ ret = __read_rt_trylock(cpuhp_pin);
+ if (ret) {
+ current->pinned_on_cpu = smp_processor_id();
+ return;
+ }
+ cpu = smp_processor_id();
+ preempt_lazy_enable();
+ preempt_enable();
+
+ __read_rt_lock(cpuhp_pin);
+
+ preempt_disable();
+ preempt_lazy_disable();
+ if (cpu != smp_processor_id()) {
+ __read_rt_unlock(cpuhp_pin);
+ goto again;
+ }
+ current->pinned_on_cpu = cpu;
}
/**
@@ -295,6 +323,13 @@ void pin_current_cpu(void)
*/
void unpin_current_cpu(void)
{
+ struct rt_rw_lock *cpuhp_pin = this_cpu_ptr(&cpuhp_pin_lock);
+
+ if (WARN_ON(current->pinned_on_cpu != smp_processor_id()))
+ cpuhp_pin = per_cpu_ptr(&cpuhp_pin_lock, current->pinned_on_cpu);
+
+ current->pinned_on_cpu = -1;
+ __read_rt_unlock(cpuhp_pin);
}
DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
@@ -764,6 +799,7 @@ static int take_cpu_down(void *_param)
static int takedown_cpu(unsigned int cpu)
{
+ struct rt_rw_lock *cpuhp_pin = per_cpu_ptr(&cpuhp_pin_lock, cpu);
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
int err;
@@ -777,11 +813,14 @@ static int takedown_cpu(unsigned int cpu
*/
irq_lock_sparse();
+ __write_rt_lock(cpuhp_pin);
+
/*
* So now all preempt/rcu users must observe !cpu_active().
*/
err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
if (err) {
+ __write_rt_unlock(cpuhp_pin);
/* CPU refused to die */
irq_unlock_sparse();
/* Unpark the hotplug thread so we can rollback there */
@@ -800,6 +839,7 @@ static int takedown_cpu(unsigned int cpu
wait_for_ap_thread(st, false);
BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
+ __write_rt_unlock(cpuhp_pin);
/* Interrupts are moved away from the dying cpu, reenable alloc/free */
irq_unlock_sparse();

View File

@ -0,0 +1,33 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 9 Apr 2015 15:23:01 +0200
Subject: cpufreq: drop K8's driver from beeing selected
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Ralf posted a picture of a backtrace from
| powernowk8_target_fn() -> transition_frequency_fidvid() and then at the
| end:
| 932 policy = cpufreq_cpu_get(smp_processor_id());
| 933 cpufreq_cpu_put(policy);
crashing the system on -RT. I assumed that policy was a NULL pointer but
was rulled out. Since Ralf can't do any more investigations on this and
I have no machine with this, I simply switch it off.
Reported-by: Ralf Mardorf <ralf.mardorf@alice-dsl.net>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/cpufreq/Kconfig.x86 | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -125,7 +125,7 @@ config X86_POWERNOW_K7_ACPI
config X86_POWERNOW_K8
tristate "AMD Opteron/Athlon64 PowerNow!"
- depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ
+ depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE
help
This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors.
Support for K10 and newer processors is now in acpi-cpufreq.

View File

@ -0,0 +1,68 @@
Subject: cpumask: Disable CONFIG_CPUMASK_OFFSTACK for RT
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 14 Dec 2011 01:03:49 +0100
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
There are "valid" GFP_ATOMIC allocations such as
|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:931
|in_atomic(): 1, irqs_disabled(): 0, pid: 2130, name: tar
|1 lock held by tar/2130:
| #0: (&mm->mmap_sem){++++++}, at: [<ffffffff811d4e89>] SyS_brk+0x39/0x190
|Preemption disabled at:[<ffffffff81063048>] flush_tlb_mm_range+0x28/0x350
|
|CPU: 1 PID: 2130 Comm: tar Tainted: G W 4.8.2-rt2+ #747
|Call Trace:
| [<ffffffff814d52dc>] dump_stack+0x86/0xca
| [<ffffffff810a26fb>] ___might_sleep+0x14b/0x240
| [<ffffffff819bc1d4>] rt_spin_lock+0x24/0x60
| [<ffffffff81194fba>] get_page_from_freelist+0x83a/0x11b0
| [<ffffffff81195e8b>] __alloc_pages_nodemask+0x15b/0x1190
| [<ffffffff811f0b81>] alloc_pages_current+0xa1/0x1f0
| [<ffffffff811f7df5>] new_slab+0x3e5/0x690
| [<ffffffff811fb0d5>] ___slab_alloc+0x495/0x660
| [<ffffffff811fb311>] __slab_alloc.isra.79+0x71/0xc0
| [<ffffffff811fb447>] __kmalloc_node+0xe7/0x240
| [<ffffffff814d4ee0>] alloc_cpumask_var_node+0x20/0x50
| [<ffffffff814d4f3e>] alloc_cpumask_var+0xe/0x10
| [<ffffffff810430c1>] native_send_call_func_ipi+0x21/0x130
| [<ffffffff8111c13f>] smp_call_function_many+0x22f/0x370
| [<ffffffff81062b64>] native_flush_tlb_others+0x1a4/0x3a0
| [<ffffffff8106309b>] flush_tlb_mm_range+0x7b/0x350
| [<ffffffff811c88e2>] tlb_flush_mmu_tlbonly+0x62/0xd0
| [<ffffffff811c9af4>] tlb_finish_mmu+0x14/0x50
| [<ffffffff811d1c84>] unmap_region+0xe4/0x110
| [<ffffffff811d3db3>] do_munmap+0x293/0x470
| [<ffffffff811d4f8c>] SyS_brk+0x13c/0x190
| [<ffffffff810032e2>] do_fast_syscall_32+0xb2/0x2f0
| [<ffffffff819be181>] entry_SYSENTER_compat+0x51/0x60
which forbid allocations at run-time.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/Kconfig | 2 +-
lib/Kconfig | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -938,7 +938,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
config MAXSMP
bool "Enable Maximum number of SMP Processors and NUMA Nodes"
depends on X86_64 && SMP && DEBUG_KERNEL
- select CPUMASK_OFFSTACK
+ select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
---help---
Enable maximum number of CPUS and NUMA Nodes for this architecture.
If unsure, say N.
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -434,6 +434,7 @@ config CHECK_SIGNATURE
config CPUMASK_OFFSTACK
bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
+ depends on !PREEMPT_RT_FULL
help
Use dynamic allocation for cpumask_var_t, instead of putting
them on the stack. This is a bit more expensive, but avoids

View File

@ -0,0 +1,287 @@
From: Mike Galbraith <efault@gmx.de>
Date: Sun, 8 Jan 2017 09:32:25 +0100
Subject: [PATCH] cpuset: Convert callback_lock to raw_spinlock_t
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
The two commits below add up to a cpuset might_sleep() splat for RT:
8447a0fee974 cpuset: convert callback_mutex to a spinlock
344736f29b35 cpuset: simplify cpuset_node_allowed API
BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:995
in_atomic(): 0, irqs_disabled(): 1, pid: 11718, name: cset
CPU: 135 PID: 11718 Comm: cset Tainted: G E 4.10.0-rt1-rt #4
Hardware name: Intel Corporation BRICKLAND/BRICKLAND, BIOS BRHSXSD1.86B.0056.R01.1409242327 09/24/2014
Call Trace:
? dump_stack+0x5c/0x81
? ___might_sleep+0xf4/0x170
? rt_spin_lock+0x1c/0x50
? __cpuset_node_allowed+0x66/0xc0
? ___slab_alloc+0x390/0x570 <disables IRQs>
? anon_vma_fork+0x8f/0x140
? copy_page_range+0x6cf/0xb00
? anon_vma_fork+0x8f/0x140
? __slab_alloc.isra.74+0x5a/0x81
? anon_vma_fork+0x8f/0x140
? kmem_cache_alloc+0x1b5/0x1f0
? anon_vma_fork+0x8f/0x140
? copy_process.part.35+0x1670/0x1ee0
? _do_fork+0xdd/0x3f0
? _do_fork+0xdd/0x3f0
? do_syscall_64+0x61/0x170
? entry_SYSCALL64_slow_path+0x25/0x25
The later ensured that a NUMA box WILL take callback_lock in atomic
context by removing the allocator and reclaim path __GFP_HARDWALL
usage which prevented such contexts from taking callback_mutex.
One option would be to reinstate __GFP_HARDWALL protections for
RT, however, as the 8447a0fee974 changelog states:
The callback_mutex is only used to synchronize reads/updates of cpusets'
flags and cpu/node masks. These operations should always proceed fast so
there's no reason why we can't use a spinlock instead of the mutex.
Cc: stable-rt@vger.kernel.org
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/cgroup/cpuset.c | 66 ++++++++++++++++++++++++-------------------------
1 file changed, 33 insertions(+), 33 deletions(-)
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -288,7 +288,7 @@ static struct cpuset top_cpuset = {
*/
static DEFINE_MUTEX(cpuset_mutex);
-static DEFINE_SPINLOCK(callback_lock);
+static DEFINE_RAW_SPINLOCK(callback_lock);
static struct workqueue_struct *cpuset_migrate_mm_wq;
@@ -922,9 +922,9 @@ static void update_cpumasks_hier(struct
continue;
rcu_read_unlock();
- spin_lock_irq(&callback_lock);
+ raw_spin_lock_irq(&callback_lock);
cpumask_copy(cp->effective_cpus, new_cpus);
- spin_unlock_irq(&callback_lock);
+ raw_spin_unlock_irq(&callback_lock);
WARN_ON(!is_in_v2_mode() &&
!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
@@ -989,9 +989,9 @@ static int update_cpumask(struct cpuset
if (retval < 0)
return retval;
- spin_lock_irq(&callback_lock);
+ raw_spin_lock_irq(&callback_lock);
cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
- spin_unlock_irq(&callback_lock);
+ raw_spin_unlock_irq(&callback_lock);
/* use trialcs->cpus_allowed as a temp variable */
update_cpumasks_hier(cs, trialcs->cpus_allowed);
@@ -1175,9 +1175,9 @@ static void update_nodemasks_hier(struct
continue;
rcu_read_unlock();
- spin_lock_irq(&callback_lock);
+ raw_spin_lock_irq(&callback_lock);
cp->effective_mems = *new_mems;
- spin_unlock_irq(&callback_lock);
+ raw_spin_unlock_irq(&callback_lock);
WARN_ON(!is_in_v2_mode() &&
!nodes_equal(cp->mems_allowed, cp->effective_mems));
@@ -1245,9 +1245,9 @@ static int update_nodemask(struct cpuset
if (retval < 0)
goto done;
- spin_lock_irq(&callback_lock);
+ raw_spin_lock_irq(&callback_lock);
cs->mems_allowed = trialcs->mems_allowed;
- spin_unlock_irq(&callback_lock);
+ raw_spin_unlock_irq(&callback_lock);
/* use trialcs->mems_allowed as a temp variable */
update_nodemasks_hier(cs, &trialcs->mems_allowed);
@@ -1338,9 +1338,9 @@ static int update_flag(cpuset_flagbits_t
spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
|| (is_spread_page(cs) != is_spread_page(trialcs)));
- spin_lock_irq(&callback_lock);
+ raw_spin_lock_irq(&callback_lock);
cs->flags = trialcs->flags;
- spin_unlock_irq(&callback_lock);
+ raw_spin_unlock_irq(&callback_lock);
if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
rebuild_sched_domains_locked();
@@ -1755,7 +1755,7 @@ static int cpuset_common_seq_show(struct
cpuset_filetype_t type = seq_cft(sf)->private;
int ret = 0;
- spin_lock_irq(&callback_lock);
+ raw_spin_lock_irq(&callback_lock);
switch (type) {
case FILE_CPULIST:
@@ -1774,7 +1774,7 @@ static int cpuset_common_seq_show(struct
ret = -EINVAL;
}
- spin_unlock_irq(&callback_lock);
+ raw_spin_unlock_irq(&callback_lock);
return ret;
}
@@ -1989,12 +1989,12 @@ static int cpuset_css_online(struct cgro
cpuset_inc();
- spin_lock_irq(&callback_lock);
+ raw_spin_lock_irq(&callback_lock);
if (is_in_v2_mode()) {
cpumask_copy(cs->effective_cpus, parent->effective_cpus);
cs->effective_mems = parent->effective_mems;
}
- spin_unlock_irq(&callback_lock);
+ raw_spin_unlock_irq(&callback_lock);
if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
goto out_unlock;
@@ -2021,12 +2021,12 @@ static int cpuset_css_online(struct cgro
}
rcu_read_unlock();
- spin_lock_irq(&callback_lock);
+ raw_spin_lock_irq(&callback_lock);
cs->mems_allowed = parent->mems_allowed;
cs->effective_mems = parent->mems_allowed;
cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
- spin_unlock_irq(&callback_lock);
+ raw_spin_unlock_irq(&callback_lock);
out_unlock:
mutex_unlock(&cpuset_mutex);
return 0;
@@ -2065,7 +2065,7 @@ static void cpuset_css_free(struct cgrou
static void cpuset_bind(struct cgroup_subsys_state *root_css)
{
mutex_lock(&cpuset_mutex);
- spin_lock_irq(&callback_lock);
+ raw_spin_lock_irq(&callback_lock);
if (is_in_v2_mode()) {
cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
@@ -2076,7 +2076,7 @@ static void cpuset_bind(struct cgroup_su
top_cpuset.mems_allowed = top_cpuset.effective_mems;
}
- spin_unlock_irq(&callback_lock);
+ raw_spin_unlock_irq(&callback_lock);
mutex_unlock(&cpuset_mutex);
}
@@ -2174,12 +2174,12 @@ hotplug_update_tasks_legacy(struct cpuse
{
bool is_empty;
- spin_lock_irq(&callback_lock);
+ raw_spin_lock_irq(&callback_lock);
cpumask_copy(cs->cpus_allowed, new_cpus);
cpumask_copy(cs->effective_cpus, new_cpus);
cs->mems_allowed = *new_mems;
cs->effective_mems = *new_mems;
- spin_unlock_irq(&callback_lock);
+ raw_spin_unlock_irq(&callback_lock);
/*
* Don't call update_tasks_cpumask() if the cpuset becomes empty,
@@ -2216,10 +2216,10 @@ hotplug_update_tasks(struct cpuset *cs,
if (nodes_empty(*new_mems))
*new_mems = parent_cs(cs)->effective_mems;
- spin_lock_irq(&callback_lock);
+ raw_spin_lock_irq(&callback_lock);
cpumask_copy(cs->effective_cpus, new_cpus);
cs->effective_mems = *new_mems;
- spin_unlock_irq(&callback_lock);
+ raw_spin_unlock_irq(&callback_lock);
if (cpus_updated)
update_tasks_cpumask(cs);
@@ -2312,21 +2312,21 @@ static void cpuset_hotplug_workfn(struct
/* synchronize cpus_allowed to cpu_active_mask */
if (cpus_updated) {
- spin_lock_irq(&callback_lock);
+ raw_spin_lock_irq(&callback_lock);
if (!on_dfl)
cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
- spin_unlock_irq(&callback_lock);
+ raw_spin_unlock_irq(&callback_lock);
/* we don't mess with cpumasks of tasks in top_cpuset */
}
/* synchronize mems_allowed to N_MEMORY */
if (mems_updated) {
- spin_lock_irq(&callback_lock);
+ raw_spin_lock_irq(&callback_lock);
if (!on_dfl)
top_cpuset.mems_allowed = new_mems;
top_cpuset.effective_mems = new_mems;
- spin_unlock_irq(&callback_lock);
+ raw_spin_unlock_irq(&callback_lock);
update_tasks_nodemask(&top_cpuset);
}
@@ -2425,11 +2425,11 @@ void cpuset_cpus_allowed(struct task_str
{
unsigned long flags;
- spin_lock_irqsave(&callback_lock, flags);
+ raw_spin_lock_irqsave(&callback_lock, flags);
rcu_read_lock();
guarantee_online_cpus(task_cs(tsk), pmask);
rcu_read_unlock();
- spin_unlock_irqrestore(&callback_lock, flags);
+ raw_spin_unlock_irqrestore(&callback_lock, flags);
}
void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
@@ -2477,11 +2477,11 @@ nodemask_t cpuset_mems_allowed(struct ta
nodemask_t mask;
unsigned long flags;
- spin_lock_irqsave(&callback_lock, flags);
+ raw_spin_lock_irqsave(&callback_lock, flags);
rcu_read_lock();
guarantee_online_mems(task_cs(tsk), &mask);
rcu_read_unlock();
- spin_unlock_irqrestore(&callback_lock, flags);
+ raw_spin_unlock_irqrestore(&callback_lock, flags);
return mask;
}
@@ -2573,14 +2573,14 @@ bool __cpuset_node_allowed(int node, gfp
return true;
/* Not hardwall and node outside mems_allowed: scan up cpusets */
- spin_lock_irqsave(&callback_lock, flags);
+ raw_spin_lock_irqsave(&callback_lock, flags);
rcu_read_lock();
cs = nearest_hardwall_ancestor(task_cs(current));
allowed = node_isset(node, cs->mems_allowed);
rcu_read_unlock();
- spin_unlock_irqrestore(&callback_lock, flags);
+ raw_spin_unlock_irqrestore(&callback_lock, flags);
return allowed;
}

View File

@ -0,0 +1,255 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 21 Feb 2014 17:24:04 +0100
Subject: crypto: Reduce preempt disabled regions, more algos
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Don Estabrook reported
| kernel: WARNING: CPU: 2 PID: 858 at kernel/sched/core.c:2428 migrate_disable+0xed/0x100()
| kernel: WARNING: CPU: 2 PID: 858 at kernel/sched/core.c:2462 migrate_enable+0x17b/0x200()
| kernel: WARNING: CPU: 3 PID: 865 at kernel/sched/core.c:2428 migrate_disable+0xed/0x100()
and his backtrace showed some crypto functions which looked fine.
The problem is the following sequence:
glue_xts_crypt_128bit()
{
blkcipher_walk_virt(); /* normal migrate_disable() */
glue_fpu_begin(); /* get atomic */
while (nbytes) {
__glue_xts_crypt_128bit();
blkcipher_walk_done(); /* with nbytes = 0, migrate_enable()
* while we are atomic */
};
glue_fpu_end() /* no longer atomic */
}
and this is why the counter get out of sync and the warning is printed.
The other problem is that we are non-preemptible between
glue_fpu_begin() and glue_fpu_end() and the latency grows. To fix this,
I shorten the FPU off region and ensure blkcipher_walk_done() is called
with preemption enabled. This might hurt the performance because we now
enable/disable the FPU state more often but we gain lower latency and
the bug is gone.
Reported-by: Don Estabrook <don.estabrook@gmail.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/x86/crypto/cast5_avx_glue.c | 21 +++++++++------------
arch/x86/crypto/glue_helper.c | 31 ++++++++++++++++---------------
2 files changed, 25 insertions(+), 27 deletions(-)
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -61,7 +61,7 @@ static inline void cast5_fpu_end(bool fp
static int ecb_crypt(struct skcipher_request *req, bool enc)
{
- bool fpu_enabled = false;
+ bool fpu_enabled;
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
@@ -76,7 +76,7 @@ static int ecb_crypt(struct skcipher_req
u8 *wsrc = walk.src.virt.addr;
u8 *wdst = walk.dst.virt.addr;
- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
+ fpu_enabled = cast5_fpu_begin(false, &walk, nbytes);
/* Process multi-block batch */
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
@@ -105,10 +105,9 @@ static int ecb_crypt(struct skcipher_req
} while (nbytes >= bsize);
done:
+ cast5_fpu_end(fpu_enabled);
err = skcipher_walk_done(&walk, nbytes);
}
-
- cast5_fpu_end(fpu_enabled);
return err;
}
@@ -212,7 +211,7 @@ static int cbc_decrypt(struct skcipher_r
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
- bool fpu_enabled = false;
+ bool fpu_enabled;
struct skcipher_walk walk;
unsigned int nbytes;
int err;
@@ -220,12 +219,11 @@ static int cbc_decrypt(struct skcipher_r
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
+ fpu_enabled = cast5_fpu_begin(false, &walk, nbytes);
nbytes = __cbc_decrypt(ctx, &walk);
+ cast5_fpu_end(fpu_enabled);
err = skcipher_walk_done(&walk, nbytes);
}
-
- cast5_fpu_end(fpu_enabled);
return err;
}
@@ -292,7 +290,7 @@ static int ctr_crypt(struct skcipher_req
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
- bool fpu_enabled = false;
+ bool fpu_enabled;
struct skcipher_walk walk;
unsigned int nbytes;
int err;
@@ -300,13 +298,12 @@ static int ctr_crypt(struct skcipher_req
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
+ fpu_enabled = cast5_fpu_begin(false, &walk, nbytes);
nbytes = __ctr_crypt(&walk, ctx);
+ cast5_fpu_end(fpu_enabled);
err = skcipher_walk_done(&walk, nbytes);
}
- cast5_fpu_end(fpu_enabled);
-
if (walk.nbytes) {
ctr_crypt_final(&walk, ctx);
err = skcipher_walk_done(&walk, 0);
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -38,7 +38,7 @@ int glue_ecb_req_128bit(const struct com
void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
const unsigned int bsize = 128 / 8;
struct skcipher_walk walk;
- bool fpu_enabled = false;
+ bool fpu_enabled;
unsigned int nbytes;
int err;
@@ -51,7 +51,7 @@ int glue_ecb_req_128bit(const struct com
unsigned int i;
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- &walk, fpu_enabled, nbytes);
+ &walk, false, nbytes);
for (i = 0; i < gctx->num_funcs; i++) {
func_bytes = bsize * gctx->funcs[i].num_blocks;
@@ -69,10 +69,9 @@ int glue_ecb_req_128bit(const struct com
if (nbytes < bsize)
break;
}
+ glue_fpu_end(fpu_enabled);
err = skcipher_walk_done(&walk, nbytes);
}
-
- glue_fpu_end(fpu_enabled);
return err;
}
EXPORT_SYMBOL_GPL(glue_ecb_req_128bit);
@@ -115,7 +114,7 @@ int glue_cbc_decrypt_req_128bit(const st
void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
const unsigned int bsize = 128 / 8;
struct skcipher_walk walk;
- bool fpu_enabled = false;
+ bool fpu_enabled;
unsigned int nbytes;
int err;
@@ -129,7 +128,7 @@ int glue_cbc_decrypt_req_128bit(const st
u128 last_iv;
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- &walk, fpu_enabled, nbytes);
+ &walk, false, nbytes);
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
@@ -161,10 +160,10 @@ int glue_cbc_decrypt_req_128bit(const st
done:
u128_xor(dst, dst, (u128 *)walk.iv);
*(u128 *)walk.iv = last_iv;
+ glue_fpu_end(fpu_enabled);
err = skcipher_walk_done(&walk, nbytes);
}
- glue_fpu_end(fpu_enabled);
return err;
}
EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit);
@@ -175,7 +174,7 @@ int glue_ctr_req_128bit(const struct com
void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
const unsigned int bsize = 128 / 8;
struct skcipher_walk walk;
- bool fpu_enabled = false;
+ bool fpu_enabled;
unsigned int nbytes;
int err;
@@ -189,7 +188,7 @@ int glue_ctr_req_128bit(const struct com
le128 ctrblk;
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- &walk, fpu_enabled, nbytes);
+ &walk, false, nbytes);
be128_to_le128(&ctrblk, (be128 *)walk.iv);
@@ -213,11 +212,10 @@ int glue_ctr_req_128bit(const struct com
}
le128_to_be128((be128 *)walk.iv, &ctrblk);
+ glue_fpu_end(fpu_enabled);
err = skcipher_walk_done(&walk, nbytes);
}
- glue_fpu_end(fpu_enabled);
-
if (nbytes) {
le128 ctrblk;
u128 tmp;
@@ -278,7 +276,7 @@ int glue_xts_req_128bit(const struct com
{
const unsigned int bsize = 128 / 8;
struct skcipher_walk walk;
- bool fpu_enabled = false;
+ bool fpu_enabled;
unsigned int nbytes;
int err;
@@ -289,21 +287,24 @@ int glue_xts_req_128bit(const struct com
/* set minimum length to bsize, for tweak_fn */
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- &walk, fpu_enabled,
+ &walk, false,
nbytes < bsize ? bsize : nbytes);
/* calculate first value of T */
tweak_fn(tweak_ctx, walk.iv, walk.iv);
while (nbytes) {
+ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+ &walk, fpu_enabled,
+ nbytes < bsize ? bsize : nbytes);
nbytes = __glue_xts_req_128bit(gctx, crypt_ctx, &walk);
+ glue_fpu_end(fpu_enabled);
+ fpu_enabled = false;
err = skcipher_walk_done(&walk, nbytes);
nbytes = walk.nbytes;
}
- glue_fpu_end(fpu_enabled);
-
return err;
}
EXPORT_SYMBOL_GPL(glue_xts_req_128bit);

View File

@ -0,0 +1,79 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 26 Jul 2018 18:52:00 +0200
Subject: [PATCH] crypto: cryptd - add a lock instead
preempt_disable/local_bh_disable
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
cryptd has a per-CPU lock which protected with local_bh_disable() and
preempt_disable().
Add an explicit spin_lock to make the locking context more obvious and
visible to lockdep. Since it is a per-CPU lock, there should be no lock
contention on the actual spinlock.
There is a small race-window where we could be migrated to another CPU
after the cpu_queue has been obtain. This is not a problem because the
actual ressource is protected by the spinlock.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
crypto/cryptd.c | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -39,6 +39,7 @@ MODULE_PARM_DESC(cryptd_max_cpu_qlen, "S
struct cryptd_cpu_queue {
struct crypto_queue queue;
struct work_struct work;
+ spinlock_t qlock;
};
struct cryptd_queue {
@@ -117,6 +118,7 @@ static int cryptd_init_queue(struct cryp
cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
INIT_WORK(&cpu_queue->work, cryptd_queue_worker);
+ spin_lock_init(&cpu_queue->qlock);
}
pr_info("cryptd: max_cpu_qlen set to %d\n", max_cpu_qlen);
return 0;
@@ -141,8 +143,10 @@ static int cryptd_enqueue_request(struct
struct cryptd_cpu_queue *cpu_queue;
atomic_t *refcnt;
- cpu = get_cpu();
- cpu_queue = this_cpu_ptr(queue->cpu_queue);
+ cpu_queue = raw_cpu_ptr(queue->cpu_queue);
+ spin_lock_bh(&cpu_queue->qlock);
+ cpu = smp_processor_id();
+
err = crypto_enqueue_request(&cpu_queue->queue, request);
refcnt = crypto_tfm_ctx(request->tfm);
@@ -158,7 +162,7 @@ static int cryptd_enqueue_request(struct
atomic_inc(refcnt);
out_put_cpu:
- put_cpu();
+ spin_unlock_bh(&cpu_queue->qlock);
return err;
}
@@ -174,16 +178,11 @@ static void cryptd_queue_worker(struct w
cpu_queue = container_of(work, struct cryptd_cpu_queue, work);
/*
* Only handle one request at a time to avoid hogging crypto workqueue.
- * preempt_disable/enable is used to prevent being preempted by
- * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent
- * cryptd_enqueue_request() being accessed from software interrupts.
*/
- local_bh_disable();
- preempt_disable();
+ spin_lock_bh(&cpu_queue->qlock);
backlog = crypto_get_backlog(&cpu_queue->queue);
req = crypto_dequeue_request(&cpu_queue->queue);
- preempt_enable();
- local_bh_enable();
+ spin_unlock_bh(&cpu_queue->qlock);
if (!req)
return;

View File

@ -0,0 +1,98 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 30 Nov 2017 13:40:10 +0100
Subject: [PATCH] crypto: limit more FPU-enabled sections
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Those crypto drivers use SSE/AVX/… for their crypto work and in order to
do so in kernel they need to enable the "FPU" in kernel mode which
disables preemption.
There are two problems with the way they are used:
- the while loop which processes X bytes may create latency spikes and
should be avoided or limited.
- the cipher-walk-next part may allocate/free memory and may use
kmap_atomic().
The whole kernel_fpu_begin()/end() processing isn't probably that cheap.
It most likely makes sense to process as much of those as possible in one
go. The new *_fpu_sched_rt() schedules only if a RT task is pending.
Probably we should measure the performance those ciphers in pure SW
mode and with this optimisations to see if it makes sense to keep them
for RT.
This kernel_fpu_resched() makes the code more preemptible which might hurt
performance.
Cc: stable-rt@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/x86/crypto/chacha20_glue.c | 9 +++++----
arch/x86/include/asm/fpu/api.h | 1 +
arch/x86/kernel/fpu/core.c | 12 ++++++++++++
3 files changed, 18 insertions(+), 4 deletions(-)
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c
@@ -81,23 +81,24 @@ static int chacha20_simd(struct skcipher
crypto_chacha20_init(state, ctx, walk.iv);
- kernel_fpu_begin();
-
while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
+ kernel_fpu_begin();
+
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
+ kernel_fpu_end();
err = skcipher_walk_done(&walk,
walk.nbytes % CHACHA20_BLOCK_SIZE);
}
if (walk.nbytes) {
+ kernel_fpu_begin();
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
walk.nbytes);
+ kernel_fpu_end();
err = skcipher_walk_done(&walk, 0);
}
- kernel_fpu_end();
-
return err;
}
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -25,6 +25,7 @@ extern void __kernel_fpu_begin(void);
extern void __kernel_fpu_end(void);
extern void kernel_fpu_begin(void);
extern void kernel_fpu_end(void);
+extern void kernel_fpu_resched(void);
extern bool irq_fpu_usable(void);
/*
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -137,6 +137,18 @@ void kernel_fpu_end(void)
}
EXPORT_SYMBOL_GPL(kernel_fpu_end);
+void kernel_fpu_resched(void)
+{
+ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
+
+ if (should_resched(PREEMPT_OFFSET)) {
+ kernel_fpu_end();
+ cond_resched();
+ kernel_fpu_begin();
+ }
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_resched);
+
/*
* Save the FPU state (mark it for reload if necessary):
*

View File

@ -0,0 +1,77 @@
From: Mike Galbraith <efault@gmx.de>
Date: Wed, 11 Jul 2018 17:14:47 +0200
Subject: [PATCH] crypto: scompress - serialize RT percpu scratch buffer
access with a local lock
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
| BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:974
| in_atomic(): 1, irqs_disabled(): 0, pid: 1401, name: cryptomgr_test
| Preemption disabled at:
| [<ffff00000849941c>] scomp_acomp_comp_decomp+0x34/0x1a0
| CPU: 21 PID: 1401 Comm: cryptomgr_test Tainted: G W 4.16.18-rt9-rt #1
| Hardware name: www.cavium.com crb-1s/crb-1s, BIOS 0.3 Apr 25 2017
| Call trace:
| dump_backtrace+0x0/0x1c8
| show_stack+0x24/0x30
| dump_stack+0xac/0xe8
| ___might_sleep+0x124/0x188
| rt_spin_lock+0x40/0x88
| zip_load_instr+0x44/0x170 [thunderx_zip]
| zip_deflate+0x184/0x378 [thunderx_zip]
| zip_compress+0xb0/0x130 [thunderx_zip]
| zip_scomp_compress+0x48/0x60 [thunderx_zip]
| scomp_acomp_comp_decomp+0xd8/0x1a0
| scomp_acomp_compress+0x24/0x30
| test_acomp+0x15c/0x558
| alg_test_comp+0xc0/0x128
| alg_test.part.6+0x120/0x2c0
| alg_test+0x6c/0xa0
| cryptomgr_test+0x50/0x58
| kthread+0x134/0x138
| ret_from_fork+0x10/0x18
Mainline disables preemption to serialize percpu scratch buffer access,
causing the splat above. Serialize with a local lock for RT instead.
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
crypto/scompress.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
--- a/crypto/scompress.c
+++ b/crypto/scompress.c
@@ -24,6 +24,7 @@
#include <linux/cryptouser.h>
#include <net/netlink.h>
#include <linux/scatterlist.h>
+#include <linux/locallock.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/acompress.h>
#include <crypto/internal/scompress.h>
@@ -34,6 +35,7 @@ static void * __percpu *scomp_src_scratc
static void * __percpu *scomp_dst_scratches;
static int scomp_scratch_users;
static DEFINE_MUTEX(scomp_lock);
+static DEFINE_LOCAL_IRQ_LOCK(scomp_scratches_lock);
#ifdef CONFIG_NET
static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg)
@@ -146,7 +148,7 @@ static int scomp_acomp_comp_decomp(struc
void **tfm_ctx = acomp_tfm_ctx(tfm);
struct crypto_scomp *scomp = *tfm_ctx;
void **ctx = acomp_request_ctx(req);
- const int cpu = get_cpu();
+ const int cpu = local_lock_cpu(scomp_scratches_lock);
u8 *scratch_src = *per_cpu_ptr(scomp_src_scratches, cpu);
u8 *scratch_dst = *per_cpu_ptr(scomp_dst_scratches, cpu);
int ret;
@@ -181,7 +183,7 @@ static int scomp_acomp_comp_decomp(struc
1);
}
out:
- put_cpu();
+ local_unlock_cpu(scomp_scratches_lock);
return ret;
}

View File

@ -0,0 +1,26 @@
Subject: debugobjects: Make RT aware
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 17 Jul 2011 21:41:35 +0200
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Avoid filling the pool / allocating memory with irqs off().
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
lib/debugobjects.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -374,7 +374,10 @@ static void
struct debug_obj *obj;
unsigned long flags;
- fill_pool();
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (preempt_count() == 0 && !irqs_disabled())
+#endif
+ fill_pool();
db = get_bucket((unsigned long) addr);

View File

@ -0,0 +1,31 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 27 Mar 2018 16:24:15 +0200
Subject: [PATCH] dm rq: remove BUG_ON(!irqs_disabled) check
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
In commit 052189a2ec95 ("dm: remove superfluous irq disablement in
dm_request_fn") the spin_lock_irq() was replaced with spin_lock() + a
check for disabled interrupts. Later the locking part was removed in
commit 2eb6e1e3aa87 ("dm: submit stacked requests in irq enabled
context") but the BUG_ON() check remained.
Since the original purpose for the "are-irqs-off" check is gone (the
->queue_lock has been removed) remove it.
Cc: Keith Busch <keith.busch@intel.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/md/dm-rq.c | 1 -
1 file changed, 1 deletion(-)
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -688,7 +688,6 @@ static void dm_old_request_fn(struct req
/* Establish tio->ti before queuing work (map_tio_request) */
tio->ti = ti;
kthread_queue_work(&md->kworker, &tio->work);
- BUG_ON(!irqs_disabled());
}
}

View File

@ -0,0 +1,86 @@
From: Mike Galbraith <umgwanakikbuti@gmail.com>
Date: Thu, 31 Mar 2016 04:08:28 +0200
Subject: [PATCH] drivers/block/zram: Replace bit spinlocks with rtmutex
for -rt
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
They're nondeterministic, and lead to ___might_sleep() splats in -rt.
OTOH, they're a lot less wasteful than an rtmutex per page.
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/block/zram/zram_drv.c | 28 ++++++++++++++++++++++++++++
drivers/block/zram/zram_drv.h | 3 +++
2 files changed, 31 insertions(+)
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -53,6 +53,30 @@ static size_t huge_class_size;
static void zram_free_page(struct zram *zram, size_t index);
+#ifdef CONFIG_PREEMPT_RT_BASE
+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages)
+{
+ size_t index;
+
+ for (index = 0; index < num_pages; index++)
+ spin_lock_init(&zram->table[index].lock);
+}
+
+static void zram_slot_lock(struct zram *zram, u32 index)
+{
+ spin_lock(&zram->table[index].lock);
+ __set_bit(ZRAM_LOCK, &zram->table[index].value);
+}
+
+static void zram_slot_unlock(struct zram *zram, u32 index)
+{
+ __clear_bit(ZRAM_LOCK, &zram->table[index].value);
+ spin_unlock(&zram->table[index].lock);
+}
+
+#else
+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { }
+
static void zram_slot_lock(struct zram *zram, u32 index)
{
bit_spin_lock(ZRAM_LOCK, &zram->table[index].value);
@@ -62,6 +86,7 @@ static void zram_slot_unlock(struct zram
{
bit_spin_unlock(ZRAM_LOCK, &zram->table[index].value);
}
+#endif
static inline bool init_done(struct zram *zram)
{
@@ -880,6 +905,8 @@ static DEVICE_ATTR_RO(io_stat);
static DEVICE_ATTR_RO(mm_stat);
static DEVICE_ATTR_RO(debug_stat);
+
+
static void zram_meta_free(struct zram *zram, u64 disksize)
{
size_t num_pages = disksize >> PAGE_SHIFT;
@@ -910,6 +937,7 @@ static bool zram_meta_alloc(struct zram
if (!huge_class_size)
huge_class_size = zs_huge_class_size(zram->mem_pool);
+ zram_meta_init_table_locks(zram, num_pages);
return true;
}
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -61,6 +61,9 @@ struct zram_table_entry {
unsigned long element;
};
unsigned long value;
+#ifdef CONFIG_PREEMPT_RT_BASE
+ spinlock_t lock;
+#endif
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
ktime_t ac_time;
#endif

View File

@ -0,0 +1,43 @@
Subject: tty/serial/omap: Make the locking RT aware
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 28 Jul 2011 13:32:57 +0200
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
The lock is a sleeping lock and local_irq_save() is not the
optimsation we are looking for. Redo it to make it work on -RT and
non-RT.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/tty/serial/omap-serial.c | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -1307,13 +1307,10 @@ serial_omap_console_write(struct console
pm_runtime_get_sync(up->dev);
- local_irq_save(flags);
- if (up->port.sysrq)
- locked = 0;
- else if (oops_in_progress)
- locked = spin_trylock(&up->port.lock);
+ if (up->port.sysrq || oops_in_progress)
+ locked = spin_trylock_irqsave(&up->port.lock, flags);
else
- spin_lock(&up->port.lock);
+ spin_lock_irqsave(&up->port.lock, flags);
/*
* First save the IER then disable the interrupts
@@ -1342,8 +1339,7 @@ serial_omap_console_write(struct console
pm_runtime_mark_last_busy(up->dev);
pm_runtime_put_autosuspend(up->dev);
if (locked)
- spin_unlock(&up->port.lock);
- local_irq_restore(flags);
+ spin_unlock_irqrestore(&up->port.lock, flags);
}
static int __init

View File

@ -0,0 +1,48 @@
Subject: tty/serial/pl011: Make the locking work on RT
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 08 Jan 2013 21:36:51 +0100
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
The lock is a sleeping lock and local_irq_save() is not the optimsation
we are looking for. Redo it to make it work on -RT and non-RT.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/tty/serial/amba-pl011.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -2216,13 +2216,19 @@ pl011_console_write(struct console *co,
clk_enable(uap->clk);
- local_irq_save(flags);
+ /*
+ * local_irq_save(flags);
+ *
+ * This local_irq_save() is nonsense. If we come in via sysrq
+ * handling then interrupts are already disabled. Aside of
+ * that the port.sysrq check is racy on SMP regardless.
+ */
if (uap->port.sysrq)
locked = 0;
else if (oops_in_progress)
- locked = spin_trylock(&uap->port.lock);
+ locked = spin_trylock_irqsave(&uap->port.lock, flags);
else
- spin_lock(&uap->port.lock);
+ spin_lock_irqsave(&uap->port.lock, flags);
/*
* First save the CR then disable the interrupts
@@ -2248,8 +2254,7 @@ pl011_console_write(struct console *co,
pl011_write(old_cr, uap, REG_CR);
if (locked)
- spin_unlock(&uap->port.lock);
- local_irq_restore(flags);
+ spin_unlock_irqrestore(&uap->port.lock, flags);
clk_disable(uap->clk);
}

View File

@ -0,0 +1,97 @@
From: Mike Galbraith <umgwanakikbuti@gmail.com>
Date: Thu, 20 Oct 2016 11:15:22 +0200
Subject: [PATCH] drivers/zram: Don't disable preemption in
zcomp_stream_get/put()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
In v4.7, the driver switched to percpu compression streams, disabling
preemption via get/put_cpu_ptr(). Use a per-zcomp_strm lock here. We
also have to fix an lock order issue in zram_decompress_page() such
that zs_map_object() nests inside of zcomp_stream_put() as it does in
zram_bvec_write().
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
[bigeasy: get_locked_var() -> per zcomp_strm lock]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/block/zram/zcomp.c | 12 ++++++++++--
drivers/block/zram/zcomp.h | 1 +
drivers/block/zram/zram_drv.c | 5 +++--
3 files changed, 14 insertions(+), 4 deletions(-)
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -116,12 +116,19 @@ ssize_t zcomp_available_show(const char
struct zcomp_strm *zcomp_stream_get(struct zcomp *comp)
{
- return *get_cpu_ptr(comp->stream);
+ struct zcomp_strm *zstrm;
+
+ zstrm = *this_cpu_ptr(comp->stream);
+ spin_lock(&zstrm->zcomp_lock);
+ return zstrm;
}
void zcomp_stream_put(struct zcomp *comp)
{
- put_cpu_ptr(comp->stream);
+ struct zcomp_strm *zstrm;
+
+ zstrm = *this_cpu_ptr(comp->stream);
+ spin_unlock(&zstrm->zcomp_lock);
}
int zcomp_compress(struct zcomp_strm *zstrm,
@@ -171,6 +178,7 @@ int zcomp_cpu_up_prepare(unsigned int cp
pr_err("Can't allocate a compression stream\n");
return -ENOMEM;
}
+ spin_lock_init(&zstrm->zcomp_lock);
*per_cpu_ptr(comp->stream, cpu) = zstrm;
return 0;
}
--- a/drivers/block/zram/zcomp.h
+++ b/drivers/block/zram/zcomp.h
@@ -14,6 +14,7 @@ struct zcomp_strm {
/* compression/decompression buffer */
void *buffer;
struct crypto_comp *tfm;
+ spinlock_t zcomp_lock;
};
/* dynamic per-device compression frontend */
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -996,6 +996,7 @@ static int __zram_bvec_read(struct zram
unsigned long handle;
unsigned int size;
void *src, *dst;
+ struct zcomp_strm *zstrm;
if (zram_wb_enabled(zram)) {
zram_slot_lock(zram, index);
@@ -1030,6 +1031,7 @@ static int __zram_bvec_read(struct zram
size = zram_get_obj_size(zram, index);
+ zstrm = zcomp_stream_get(zram->comp);
src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
if (size == PAGE_SIZE) {
dst = kmap_atomic(page);
@@ -1037,14 +1039,13 @@ static int __zram_bvec_read(struct zram
kunmap_atomic(dst);
ret = 0;
} else {
- struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
dst = kmap_atomic(page);
ret = zcomp_decompress(zstrm, src, size, dst);
kunmap_atomic(dst);
- zcomp_stream_put(zram->comp);
}
zs_unmap_object(zram->mem_pool, handle);
+ zcomp_stream_put(zram->comp);
zram_slot_unlock(zram, index);
/* Should NEVER happen. Return bio error if it does. */

View File

@ -0,0 +1,38 @@
From: Mike Galbraith <efault@gmx.de>
Date: Wed, 23 Aug 2017 11:57:29 +0200
Subject: [PATCH] drivers/zram: fix zcomp_stream_get() smp_processor_id() use
in preemptible code
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Use get_local_ptr() instead this_cpu_ptr() to avoid a warning regarding
smp_processor_id() in preemptible code.
raw_cpu_ptr() would be fine, too because the per-CPU data structure is
protected with a spin lock so it does not matter much if we take the
other one.
Cc: stable-rt@vger.kernel.org
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/block/zram/zcomp.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -118,7 +118,7 @@ struct zcomp_strm *zcomp_stream_get(stru
{
struct zcomp_strm *zstrm;
- zstrm = *this_cpu_ptr(comp->stream);
+ zstrm = *get_local_ptr(comp->stream);
spin_lock(&zstrm->zcomp_lock);
return zstrm;
}
@@ -129,6 +129,7 @@ void zcomp_stream_put(struct zcomp *comp
zstrm = *this_cpu_ptr(comp->stream);
spin_unlock(&zstrm->zcomp_lock);
+ put_local_ptr(zstrm);
}
int zcomp_compress(struct zcomp_strm *zstrm,

View File

@ -0,0 +1,113 @@
Subject: drm,i915: Use local_lock/unlock_irq() in intel_pipe_update_start/end()
From: Mike Galbraith <umgwanakikbuti@gmail.com>
Date: Sat, 27 Feb 2016 09:01:42 +0100
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
[ 8.014039] BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:918
[ 8.014041] in_atomic(): 0, irqs_disabled(): 1, pid: 78, name: kworker/u4:4
[ 8.014045] CPU: 1 PID: 78 Comm: kworker/u4:4 Not tainted 4.1.7-rt7 #5
[ 8.014055] Workqueue: events_unbound async_run_entry_fn
[ 8.014059] 0000000000000000 ffff880037153748 ffffffff815f32c9 0000000000000002
[ 8.014063] ffff88013a50e380 ffff880037153768 ffffffff815ef075 ffff8800372c06c8
[ 8.014066] ffff8800372c06c8 ffff880037153778 ffffffff8107c0b3 ffff880037153798
[ 8.014067] Call Trace:
[ 8.014074] [<ffffffff815f32c9>] dump_stack+0x4a/0x61
[ 8.014078] [<ffffffff815ef075>] ___might_sleep.part.93+0xe9/0xee
[ 8.014082] [<ffffffff8107c0b3>] ___might_sleep+0x53/0x80
[ 8.014086] [<ffffffff815f9064>] rt_spin_lock+0x24/0x50
[ 8.014090] [<ffffffff8109368b>] prepare_to_wait+0x2b/0xa0
[ 8.014152] [<ffffffffa016c04c>] intel_pipe_update_start+0x17c/0x300 [i915]
[ 8.014156] [<ffffffff81093b40>] ? prepare_to_wait_event+0x120/0x120
[ 8.014201] [<ffffffffa0158f36>] intel_begin_crtc_commit+0x166/0x1e0 [i915]
[ 8.014215] [<ffffffffa00c806d>] drm_atomic_helper_commit_planes+0x5d/0x1a0 [drm_kms_helper]
[ 8.014260] [<ffffffffa0171e9b>] intel_atomic_commit+0xab/0xf0 [i915]
[ 8.014288] [<ffffffffa00654c7>] drm_atomic_commit+0x37/0x60 [drm]
[ 8.014298] [<ffffffffa00c6fcd>] drm_atomic_helper_plane_set_property+0x8d/0xd0 [drm_kms_helper]
[ 8.014301] [<ffffffff815f77d9>] ? __ww_mutex_lock+0x39/0x40
[ 8.014319] [<ffffffffa0053b3d>] drm_mode_plane_set_obj_prop+0x2d/0x90 [drm]
[ 8.014328] [<ffffffffa00c8edb>] restore_fbdev_mode+0x6b/0xf0 [drm_kms_helper]
[ 8.014337] [<ffffffffa00cae49>] drm_fb_helper_restore_fbdev_mode_unlocked+0x29/0x80 [drm_kms_helper]
[ 8.014346] [<ffffffffa00caec2>] drm_fb_helper_set_par+0x22/0x50 [drm_kms_helper]
[ 8.014390] [<ffffffffa016dfba>] intel_fbdev_set_par+0x1a/0x60 [i915]
[ 8.014394] [<ffffffff81327dc4>] fbcon_init+0x4f4/0x580
[ 8.014398] [<ffffffff8139ef4c>] visual_init+0xbc/0x120
[ 8.014401] [<ffffffff813a1623>] do_bind_con_driver+0x163/0x330
[ 8.014405] [<ffffffff813a1b2c>] do_take_over_console+0x11c/0x1c0
[ 8.014408] [<ffffffff813236e3>] do_fbcon_takeover+0x63/0xd0
[ 8.014410] [<ffffffff81328965>] fbcon_event_notify+0x785/0x8d0
[ 8.014413] [<ffffffff8107c12d>] ? __might_sleep+0x4d/0x90
[ 8.014416] [<ffffffff810775fe>] notifier_call_chain+0x4e/0x80
[ 8.014419] [<ffffffff810779cd>] __blocking_notifier_call_chain+0x4d/0x70
[ 8.014422] [<ffffffff81077a06>] blocking_notifier_call_chain+0x16/0x20
[ 8.014425] [<ffffffff8132b48b>] fb_notifier_call_chain+0x1b/0x20
[ 8.014428] [<ffffffff8132d8fa>] register_framebuffer+0x21a/0x350
[ 8.014439] [<ffffffffa00cb164>] drm_fb_helper_initial_config+0x274/0x3e0 [drm_kms_helper]
[ 8.014483] [<ffffffffa016f1cb>] intel_fbdev_initial_config+0x1b/0x20 [i915]
[ 8.014486] [<ffffffff8107912c>] async_run_entry_fn+0x4c/0x160
[ 8.014490] [<ffffffff81070ffa>] process_one_work+0x14a/0x470
[ 8.014493] [<ffffffff81071489>] worker_thread+0x169/0x4c0
[ 8.014496] [<ffffffff81071320>] ? process_one_work+0x470/0x470
[ 8.014499] [<ffffffff81076606>] kthread+0xc6/0xe0
[ 8.014502] [<ffffffff81070000>] ? queue_work_on+0x80/0x110
[ 8.014506] [<ffffffff81076540>] ? kthread_worker_fn+0x1c0/0x1c0
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: linux-rt-users <linux-rt-users@vger.kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/gpu/drm/i915/intel_sprite.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -36,6 +36,7 @@
#include <drm/drm_rect.h>
#include <drm/drm_atomic.h>
#include <drm/drm_plane_helper.h>
+#include <linux/locallock.h>
#include "intel_drv.h"
#include "intel_frontbuffer.h"
#include <drm/i915_drm.h>
@@ -74,6 +75,8 @@ int intel_usecs_to_scanlines(const struc
#define VBLANK_EVASION_TIME_US 100
#endif
+static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock);
+
/**
* intel_pipe_update_start() - start update of a set of display registers
* @new_crtc_state: the new crtc state
@@ -107,7 +110,7 @@ void intel_pipe_update_start(const struc
VBLANK_EVASION_TIME_US);
max = vblank_start - 1;
- local_irq_disable();
+ local_lock_irq(pipe_update_lock);
if (min <= 0 || max <= 0)
return;
@@ -137,11 +140,11 @@ void intel_pipe_update_start(const struc
break;
}
- local_irq_enable();
+ local_unlock_irq(pipe_update_lock);
timeout = schedule_timeout(timeout);
- local_irq_disable();
+ local_lock_irq(pipe_update_lock);
}
finish_wait(wq, &wait);
@@ -206,7 +209,7 @@ void intel_pipe_update_end(struct intel_
new_crtc_state->base.event = NULL;
}
- local_irq_enable();
+ local_unlock_irq(pipe_update_lock);
if (intel_vgpu_active(dev_priv))
return;

View File

@ -0,0 +1,52 @@
Subject: drm,radeon,i915: Use preempt_disable/enable_rt() where recommended
From: Mike Galbraith <umgwanakikbuti@gmail.com>
Date: Sat, 27 Feb 2016 08:09:11 +0100
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
DRM folks identified the spots, so use them.
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: linux-rt-users <linux-rt-users@vger.kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/gpu/drm/i915/i915_irq.c | 2 ++
drivers/gpu/drm/radeon/radeon_display.c | 2 ++
2 files changed, 4 insertions(+)
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1009,6 +1009,7 @@ static bool i915_get_crtc_scanoutpos(str
spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
+ preempt_disable_rt();
/* Get optional system timestamp before query. */
if (stime)
@@ -1060,6 +1061,7 @@ static bool i915_get_crtc_scanoutpos(str
*etime = ktime_get();
/* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
+ preempt_enable_rt();
spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -1813,6 +1813,7 @@ int radeon_get_crtc_scanoutpos(struct dr
struct radeon_device *rdev = dev->dev_private;
/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
+ preempt_disable_rt();
/* Get optional system timestamp before query. */
if (stime)
@@ -1905,6 +1906,7 @@ int radeon_get_crtc_scanoutpos(struct dr
*etime = ktime_get();
/* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
+ preempt_enable_rt();
/* Decode into vertical and horizontal scanout position. */
*vpos = position & 0x1fff;

View File

@ -0,0 +1,27 @@
From 667af2f3d8ccf947fe7c9dac0b59b175963163ba Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 26 Jul 2018 15:06:10 +0200
Subject: [PATCH] efi: Allow efi=runtime
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
In case the option "efi=noruntime" is default at built-time, the user
could overwrite its sate by `efi=runtime' and allow it again.
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/firmware/efi/efi.c | 3 +++
1 file changed, 3 insertions(+)
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -110,6 +110,9 @@ static int __init parse_efi_cmdline(char
if (parse_option_str(str, "noruntime"))
disable_runtime = true;
+ if (parse_option_str(str, "runtime"))
+ disable_runtime = false;
+
return 0;
}
early_param("efi", parse_efi_cmdline);

View File

@ -0,0 +1,40 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 26 Jul 2018 15:03:16 +0200
Subject: [PATCH] efi: Disable runtime services on RT
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Based on meassurements the EFI functions get_variable /
get_next_variable take up to 2us which looks okay.
The functions get_time, set_time take around 10ms. Those 10ms are too
much. Even one ms would be too much.
Ard mentioned that SetVariable might even trigger larger latencies if
the firware will erase flash blocks on NOR.
The time-functions are used by efi-rtc and can be triggered during
runtimed (either via explicit read/write or ntp sync).
The variable write could be used by pstore.
These functions can be disabled without much of a loss. The poweroff /
reboot hooks may be provided by PSCI.
Disable EFI's runtime wrappers.
This was observed on "EFI v2.60 by SoftIron Overdrive 1000".
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/firmware/efi/efi.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -84,7 +84,7 @@ struct mm_struct efi_mm = {
.mmlist = LIST_HEAD_INIT(efi_mm.mmlist),
};
-static bool disable_runtime;
+static bool disable_runtime = IS_ENABLED(CONFIG_PREEMPT_RT_BASE);
static int __init setup_noefi(char *arg)
{
disable_runtime = true;

View File

@ -0,0 +1,31 @@
Subject: fs/epoll: Do not disable preemption on RT
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 08 Jul 2011 16:35:35 +0200
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
ep_call_nested() takes a sleeping lock so we can't disable preemption.
The light version is enough since ep_call_nested() doesn't mind beeing
invoked twice on the same CPU.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
fs/eventpoll.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -563,12 +563,12 @@ static int ep_poll_wakeup_proc(void *pri
static void ep_poll_safewake(wait_queue_head_t *wq)
{
- int this_cpu = get_cpu();
+ int this_cpu = get_cpu_light();
ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
- put_cpu();
+ put_cpu_light();
}
#else

View File

@ -0,0 +1,83 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 16 Feb 2015 18:49:10 +0100
Subject: fs/aio: simple simple work
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:768
|in_atomic(): 1, irqs_disabled(): 0, pid: 26, name: rcuos/2
|2 locks held by rcuos/2/26:
| #0: (rcu_callback){.+.+..}, at: [<ffffffff810b1a12>] rcu_nocb_kthread+0x1e2/0x380
| #1: (rcu_read_lock_sched){.+.+..}, at: [<ffffffff812acd26>] percpu_ref_kill_rcu+0xa6/0x1c0
|Preemption disabled at:[<ffffffff810b1a93>] rcu_nocb_kthread+0x263/0x380
|Call Trace:
| [<ffffffff81582e9e>] dump_stack+0x4e/0x9c
| [<ffffffff81077aeb>] __might_sleep+0xfb/0x170
| [<ffffffff81589304>] rt_spin_lock+0x24/0x70
| [<ffffffff811c5790>] free_ioctx_users+0x30/0x130
| [<ffffffff812ace34>] percpu_ref_kill_rcu+0x1b4/0x1c0
| [<ffffffff810b1a93>] rcu_nocb_kthread+0x263/0x380
| [<ffffffff8106e046>] kthread+0xd6/0xf0
| [<ffffffff81591eec>] ret_from_fork+0x7c/0xb0
replace this preempt_disable() friendly swork.
Reported-By: Mike Galbraith <umgwanakikbuti@gmail.com>
Suggested-by: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/aio.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -40,6 +40,7 @@
#include <linux/ramfs.h>
#include <linux/percpu-refcount.h>
#include <linux/mount.h>
+#include <linux/swork.h>
#include <asm/kmap_types.h>
#include <linux/uaccess.h>
@@ -118,6 +119,7 @@ struct kioctx {
long nr_pages;
struct rcu_work free_rwork; /* see free_ioctx() */
+ struct swork_event free_swork; /* see free_ioctx() */
/*
* signals when all in-flight requests are done
@@ -256,6 +258,7 @@ static int __init aio_setup(void)
.mount = aio_mount,
.kill_sb = kill_anon_super,
};
+ BUG_ON(swork_get());
aio_mnt = kern_mount(&aio_fs);
if (IS_ERR(aio_mnt))
panic("Failed to create aio fs mount.");
@@ -597,9 +600,9 @@ static void free_ioctx_reqs(struct percp
* and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
* now it's safe to cancel any that need to be.
*/
-static void free_ioctx_users(struct percpu_ref *ref)
+static void free_ioctx_users_work(struct swork_event *sev)
{
- struct kioctx *ctx = container_of(ref, struct kioctx, users);
+ struct kioctx *ctx = container_of(sev, struct kioctx, free_swork);
struct aio_kiocb *req;
spin_lock_irq(&ctx->ctx_lock);
@@ -617,6 +620,14 @@ static void free_ioctx_users(struct perc
percpu_ref_put(&ctx->reqs);
}
+static void free_ioctx_users(struct percpu_ref *ref)
+{
+ struct kioctx *ctx = container_of(ref, struct kioctx, users);
+
+ INIT_SWORK(&ctx->free_swork, free_ioctx_users_work);
+ swork_queue(&ctx->free_swork);
+}
+
static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
{
unsigned i, new_nr;

View File

@ -0,0 +1,54 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 13 Sep 2017 12:32:34 +0200
Subject: [PATCH] fs/dcache: bring back explicit INIT_HLIST_BL_HEAD init
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Commit 3d375d78593c ("mm: update callers to use HASH_ZERO flag") removed
INIT_HLIST_BL_HEAD and uses the ZERO flag instead for the init. However
on RT we have also a spinlock which needs an init call so we can't use
that.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 11 +++++++++++
1 file changed, 11 insertions(+)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3116,6 +3116,8 @@ static int __init set_dhash_entries(char
static void __init dcache_init_early(void)
{
+ unsigned int loop;
+
/* If hashes are distributed across NUMA nodes, defer
* hash allocation until vmalloc space is available.
*/
@@ -3132,11 +3134,16 @@ static void __init dcache_init_early(voi
NULL,
0,
0);
+
+ for (loop = 0; loop < (1U << d_hash_shift); loop++)
+ INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
+
d_hash_shift = 32 - d_hash_shift;
}
static void __init dcache_init(void)
{
+ unsigned int loop;
/*
* A constructor could be added for stable state like the lists,
* but it is probably not worth it because of the cache nature
@@ -3160,6 +3167,10 @@ static void __init dcache_init(void)
NULL,
0,
0);
+
+ for (loop = 0; loop < (1U << d_hash_shift); loop++)
+ INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
+
d_hash_shift = 32 - d_hash_shift;
}

View File

@ -0,0 +1,119 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 20 Oct 2017 11:29:53 +0200
Subject: [PATCH] fs/dcache: disable preemption on i_dir_seq's write side
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
i_dir_seq is an opencoded seqcounter. Based on the code it looks like we
could have two writers in parallel despite the fact that the d_lock is
held. The problem is that during the write process on RT the preemption
is still enabled and if this process is interrupted by a reader with RT
priority then we lock up.
To avoid that lock up I am disabling the preemption during the update.
The rename of i_dir_seq is here to ensure to catch new write sides in
future.
Cc: stable-rt@vger.kernel.org
Reported-by: Oleg.Karfich@wago.com
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 12 +++++++-----
fs/inode.c | 2 +-
fs/libfs.c | 6 ++++--
include/linux/fs.h | 2 +-
4 files changed, 13 insertions(+), 9 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2431,9 +2431,10 @@ EXPORT_SYMBOL(d_rehash);
static inline unsigned start_dir_add(struct inode *dir)
{
+ preempt_disable_rt();
for (;;) {
- unsigned n = dir->i_dir_seq;
- if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
+ unsigned n = dir->__i_dir_seq;
+ if (!(n & 1) && cmpxchg(&dir->__i_dir_seq, n, n + 1) == n)
return n;
cpu_relax();
}
@@ -2441,7 +2442,8 @@ static inline unsigned start_dir_add(str
static inline void end_dir_add(struct inode *dir, unsigned n)
{
- smp_store_release(&dir->i_dir_seq, n + 2);
+ smp_store_release(&dir->__i_dir_seq, n + 2);
+ preempt_enable_rt();
}
static void d_wait_lookup(struct dentry *dentry)
@@ -2474,7 +2476,7 @@ struct dentry *d_alloc_parallel(struct d
retry:
rcu_read_lock();
- seq = smp_load_acquire(&parent->d_inode->i_dir_seq);
+ seq = smp_load_acquire(&parent->d_inode->__i_dir_seq);
r_seq = read_seqbegin(&rename_lock);
dentry = __d_lookup_rcu(parent, name, &d_seq);
if (unlikely(dentry)) {
@@ -2502,7 +2504,7 @@ struct dentry *d_alloc_parallel(struct d
}
hlist_bl_lock(b);
- if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) {
+ if (unlikely(READ_ONCE(parent->d_inode->__i_dir_seq) != seq)) {
hlist_bl_unlock(b);
rcu_read_unlock();
goto retry;
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -155,7 +155,7 @@ int inode_init_always(struct super_block
inode->i_bdev = NULL;
inode->i_cdev = NULL;
inode->i_link = NULL;
- inode->i_dir_seq = 0;
+ inode->__i_dir_seq = 0;
inode->i_rdev = 0;
inode->dirtied_when = 0;
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -90,7 +90,7 @@ static struct dentry *next_positive(stru
struct list_head *from,
int count)
{
- unsigned *seq = &parent->d_inode->i_dir_seq, n;
+ unsigned *seq = &parent->d_inode->__i_dir_seq, n;
struct dentry *res;
struct list_head *p;
bool skipped;
@@ -123,8 +123,9 @@ static struct dentry *next_positive(stru
static void move_cursor(struct dentry *cursor, struct list_head *after)
{
struct dentry *parent = cursor->d_parent;
- unsigned n, *seq = &parent->d_inode->i_dir_seq;
+ unsigned n, *seq = &parent->d_inode->__i_dir_seq;
spin_lock(&parent->d_lock);
+ preempt_disable_rt();
for (;;) {
n = *seq;
if (!(n & 1) && cmpxchg(seq, n, n + 1) == n)
@@ -137,6 +138,7 @@ static void move_cursor(struct dentry *c
else
list_add_tail(&cursor->d_child, &parent->d_subdirs);
smp_store_release(seq, n + 2);
+ preempt_enable_rt();
spin_unlock(&parent->d_lock);
}
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -657,7 +657,7 @@ struct inode {
struct block_device *i_bdev;
struct cdev *i_cdev;
char *i_link;
- unsigned i_dir_seq;
+ unsigned __i_dir_seq;
};
__u32 i_generation;

View File

@ -0,0 +1,59 @@
Subject: fs: dcache: Use cpu_chill() in trylock loops
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 07 Mar 2012 21:00:34 +0100
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Retry loops on RT might loop forever when the modifying side was
preempted. Use cpu_chill() instead of cpu_relax() to let the system
make progress.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
fs/autofs/expire.c | 3 ++-
fs/namespace.c | 8 ++++++--
2 files changed, 8 insertions(+), 3 deletions(-)
--- a/fs/autofs/expire.c
+++ b/fs/autofs/expire.c
@@ -8,6 +8,7 @@
* option, any later version, incorporated herein by reference.
*/
+#include <linux/delay.h>
#include "autofs_i.h"
static unsigned long now;
@@ -148,7 +149,7 @@ static struct dentry *get_next_positive_
parent = p->d_parent;
if (!spin_trylock(&parent->d_lock)) {
spin_unlock(&p->d_lock);
- cpu_relax();
+ cpu_chill();
goto relock;
}
spin_unlock(&p->d_lock);
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -14,6 +14,7 @@
#include <linux/mnt_namespace.h>
#include <linux/user_namespace.h>
#include <linux/namei.h>
+#include <linux/delay.h>
#include <linux/security.h>
#include <linux/cred.h>
#include <linux/idr.h>
@@ -353,8 +354,11 @@ int __mnt_want_write(struct vfsmount *m)
* incremented count after it has set MNT_WRITE_HOLD.
*/
smp_mb();
- while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
- cpu_relax();
+ while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
+ preempt_enable();
+ cpu_chill();
+ preempt_disable();
+ }
/*
* After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
* be set to match its requirements. So we must not load that until

View File

@ -0,0 +1,215 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 14 Sep 2016 14:35:49 +0200
Subject: [PATCH] fs/dcache: use swait_queue instead of waitqueue
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
__d_lookup_done() invokes wake_up_all() while holding a hlist_bl_lock()
which disables preemption. As a workaround convert it to swait.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/cifs/readdir.c | 2 +-
fs/dcache.c | 27 +++++++++++++++------------
fs/fuse/dir.c | 2 +-
fs/namei.c | 4 ++--
fs/nfs/dir.c | 4 ++--
fs/nfs/unlink.c | 4 ++--
fs/proc/base.c | 2 +-
fs/proc/proc_sysctl.c | 2 +-
include/linux/dcache.h | 4 ++--
include/linux/nfs_xdr.h | 2 +-
kernel/sched/swait.c | 1 +
11 files changed, 29 insertions(+), 25 deletions(-)
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -80,7 +80,7 @@ cifs_prime_dcache(struct dentry *parent,
struct inode *inode;
struct super_block *sb = parent->d_sb;
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2448,21 +2448,24 @@ static inline void end_dir_add(struct in
static void d_wait_lookup(struct dentry *dentry)
{
- if (d_in_lookup(dentry)) {
- DECLARE_WAITQUEUE(wait, current);
- add_wait_queue(dentry->d_wait, &wait);
- do {
- set_current_state(TASK_UNINTERRUPTIBLE);
- spin_unlock(&dentry->d_lock);
- schedule();
- spin_lock(&dentry->d_lock);
- } while (d_in_lookup(dentry));
- }
+ struct swait_queue __wait;
+
+ if (!d_in_lookup(dentry))
+ return;
+
+ INIT_LIST_HEAD(&__wait.task_list);
+ do {
+ prepare_to_swait(dentry->d_wait, &__wait, TASK_UNINTERRUPTIBLE);
+ spin_unlock(&dentry->d_lock);
+ schedule();
+ spin_lock(&dentry->d_lock);
+ } while (d_in_lookup(dentry));
+ finish_swait(dentry->d_wait, &__wait);
}
struct dentry *d_alloc_parallel(struct dentry *parent,
const struct qstr *name,
- wait_queue_head_t *wq)
+ struct swait_queue_head *wq)
{
unsigned int hash = name->hash;
struct hlist_bl_head *b = in_lookup_hash(parent, hash);
@@ -2577,7 +2580,7 @@ void __d_lookup_done(struct dentry *dent
hlist_bl_lock(b);
dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
__hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
- wake_up_all(dentry->d_wait);
+ swake_up_all(dentry->d_wait);
dentry->d_wait = NULL;
hlist_bl_unlock(b);
INIT_HLIST_NODE(&dentry->d_u.d_alias);
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1195,7 +1195,7 @@ static int fuse_direntplus_link(struct f
struct inode *dir = d_inode(parent);
struct fuse_conn *fc;
struct inode *inode;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
if (!o->nodeid) {
/*
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1604,7 +1604,7 @@ static struct dentry *__lookup_slow(cons
{
struct dentry *dentry, *old;
struct inode *inode = dir->d_inode;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
/* Don't go there if it's already dead */
if (unlikely(IS_DEADDIR(inode)))
@@ -3121,7 +3121,7 @@ static int lookup_open(struct nameidata
struct dentry *dentry;
int error, create_error = 0;
umode_t mode = op->mode;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
if (unlikely(IS_DEADDIR(dir_inode)))
return -ENOENT;
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -445,7 +445,7 @@ static
void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
{
struct qstr filename = QSTR_INIT(entry->name, entry->len);
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
struct dentry *dentry;
struct dentry *alias;
struct inode *dir = d_inode(parent);
@@ -1454,7 +1454,7 @@ int nfs_atomic_open(struct inode *dir, s
struct file *file, unsigned open_flags,
umode_t mode, int *opened)
{
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
struct nfs_open_context *ctx;
struct dentry *res;
struct iattr attr = { .ia_valid = ATTR_OPEN };
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -13,7 +13,7 @@
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
#include <linux/sched.h>
-#include <linux/wait.h>
+#include <linux/swait.h>
#include <linux/namei.h>
#include <linux/fsnotify.h>
@@ -206,7 +206,7 @@ nfs_async_unlink(struct dentry *dentry,
goto out_free_name;
}
data->res.dir_attr = &data->dir_attr;
- init_waitqueue_head(&data->wq);
+ init_swait_queue_head(&data->wq);
status = -EBUSY;
spin_lock(&dentry->d_lock);
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1864,7 +1864,7 @@ bool proc_fill_cache(struct file *file,
child = d_hash_and_lookup(dir, &qname);
if (!child) {
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
child = d_alloc_parallel(dir, &qname, &wq);
if (IS_ERR(child))
goto end_instantiate;
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -678,7 +678,7 @@ static bool proc_sys_fill_cache(struct f
child = d_lookup(dir, &qname);
if (!child) {
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
child = d_alloc_parallel(dir, &qname, &wq);
if (IS_ERR(child))
return false;
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -105,7 +105,7 @@ struct dentry {
union {
struct list_head d_lru; /* LRU list */
- wait_queue_head_t *d_wait; /* in-lookup ones only */
+ struct swait_queue_head *d_wait; /* in-lookup ones only */
};
struct list_head d_child; /* child of parent list */
struct list_head d_subdirs; /* our children */
@@ -238,7 +238,7 @@ extern struct dentry * d_alloc(struct de
extern struct dentry * d_alloc_anon(struct super_block *);
extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
- wait_queue_head_t *);
+ struct swait_queue_head *);
extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1536,7 +1536,7 @@ struct nfs_unlinkdata {
struct nfs_removeargs args;
struct nfs_removeres res;
struct dentry *dentry;
- wait_queue_head_t wq;
+ struct swait_queue_head wq;
struct rpc_cred *cred;
struct nfs_fattr dir_attr;
long timeout;
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -70,6 +70,7 @@ void swake_up_all(struct swait_queue_hea
struct swait_queue *curr;
LIST_HEAD(tmp);
+ WARN_ON(irqs_disabled());
raw_spin_lock_irq(&q->lock);
list_splice_init(&q->task_list, &tmp);
while (!list_empty(&tmp)) {

View File

@ -0,0 +1,97 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 18 Mar 2011 10:11:25 +0100
Subject: fs: jbd/jbd2: Make state lock and journal head lock rt safe
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
bit_spin_locks break under RT.
Based on a previous patch from Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--
include/linux/buffer_head.h | 8 ++++++++
include/linux/jbd2.h | 24 ++++++++++++++++++++++++
2 files changed, 32 insertions(+)
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -78,6 +78,10 @@ struct buffer_head {
atomic_t b_count; /* users using this buffer_head */
#ifdef CONFIG_PREEMPT_RT_BASE
spinlock_t b_uptodate_lock;
+#if IS_ENABLED(CONFIG_JBD2)
+ spinlock_t b_state_lock;
+ spinlock_t b_journal_head_lock;
+#endif
#endif
};
@@ -109,6 +113,10 @@ static inline void buffer_head_init_lock
{
#ifdef CONFIG_PREEMPT_RT_BASE
spin_lock_init(&bh->b_uptodate_lock);
+#if IS_ENABLED(CONFIG_JBD2)
+ spin_lock_init(&bh->b_state_lock);
+ spin_lock_init(&bh->b_journal_head_lock);
+#endif
#endif
}
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -347,32 +347,56 @@ static inline struct journal_head *bh2jh
static inline void jbd_lock_bh_state(struct buffer_head *bh)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
bit_spin_lock(BH_State, &bh->b_state);
+#else
+ spin_lock(&bh->b_state_lock);
+#endif
}
static inline int jbd_trylock_bh_state(struct buffer_head *bh)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
return bit_spin_trylock(BH_State, &bh->b_state);
+#else
+ return spin_trylock(&bh->b_state_lock);
+#endif
}
static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
return bit_spin_is_locked(BH_State, &bh->b_state);
+#else
+ return spin_is_locked(&bh->b_state_lock);
+#endif
}
static inline void jbd_unlock_bh_state(struct buffer_head *bh)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
bit_spin_unlock(BH_State, &bh->b_state);
+#else
+ spin_unlock(&bh->b_state_lock);
+#endif
}
static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
bit_spin_lock(BH_JournalHead, &bh->b_state);
+#else
+ spin_lock(&bh->b_journal_head_lock);
+#endif
}
static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
bit_spin_unlock(BH_JournalHead, &bh->b_state);
+#else
+ spin_unlock(&bh->b_journal_head_lock);
+#endif
}
#define J_ASSERT(assert) BUG_ON(!(assert))

View File

@ -0,0 +1,139 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 15 Sep 2016 10:51:27 +0200
Subject: [PATCH] fs/nfs: turn rmdir_sem into a semaphore
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
The RW semaphore had a reader side which used the _non_owner version
because it most likely took the reader lock in one thread and released it
in another which would cause lockdep to complain if the "regular"
version was used.
On -RT we need the owner because the rw lock is turned into a rtmutex.
The semaphores on the hand are "plain simple" and should work as
expected. We can't have multiple readers but on -RT we don't allow
multiple readers anyway so that is not a loss.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/nfs/dir.c | 8 ++++++++
fs/nfs/inode.c | 4 ++++
fs/nfs/unlink.c | 31 +++++++++++++++++++++++++++----
include/linux/nfs_fs.h | 4 ++++
4 files changed, 43 insertions(+), 4 deletions(-)
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1775,7 +1775,11 @@ int nfs_rmdir(struct inode *dir, struct
trace_nfs_rmdir_enter(dir, dentry);
if (d_really_is_positive(dentry)) {
+#ifdef CONFIG_PREEMPT_RT_BASE
+ down(&NFS_I(d_inode(dentry))->rmdir_sem);
+#else
down_write(&NFS_I(d_inode(dentry))->rmdir_sem);
+#endif
error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
/* Ensure the VFS deletes this inode */
switch (error) {
@@ -1785,7 +1789,11 @@ int nfs_rmdir(struct inode *dir, struct
case -ENOENT:
nfs_dentry_handle_enoent(dentry);
}
+#ifdef CONFIG_PREEMPT_RT_BASE
+ up(&NFS_I(d_inode(dentry))->rmdir_sem);
+#else
up_write(&NFS_I(d_inode(dentry))->rmdir_sem);
+#endif
} else
error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
trace_nfs_rmdir_exit(dir, dentry, error);
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2103,7 +2103,11 @@ static void init_once(void *foo)
atomic_long_set(&nfsi->nrequests, 0);
atomic_long_set(&nfsi->commit_info.ncommit, 0);
atomic_set(&nfsi->commit_info.rpcs_out, 0);
+#ifdef CONFIG_PREEMPT_RT_BASE
+ sema_init(&nfsi->rmdir_sem, 1);
+#else
init_rwsem(&nfsi->rmdir_sem);
+#endif
mutex_init(&nfsi->commit_mutex);
nfs4_init_once(nfsi);
}
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -52,6 +52,29 @@ static void nfs_async_unlink_done(struct
rpc_restart_call_prepare(task);
}
+#ifdef CONFIG_PREEMPT_RT_BASE
+static void nfs_down_anon(struct semaphore *sema)
+{
+ down(sema);
+}
+
+static void nfs_up_anon(struct semaphore *sema)
+{
+ up(sema);
+}
+
+#else
+static void nfs_down_anon(struct rw_semaphore *rwsem)
+{
+ down_read_non_owner(rwsem);
+}
+
+static void nfs_up_anon(struct rw_semaphore *rwsem)
+{
+ up_read_non_owner(rwsem);
+}
+#endif
+
/**
* nfs_async_unlink_release - Release the sillydelete data.
* @task: rpc_task of the sillydelete
@@ -65,7 +88,7 @@ static void nfs_async_unlink_release(voi
struct dentry *dentry = data->dentry;
struct super_block *sb = dentry->d_sb;
- up_read_non_owner(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem);
+ nfs_up_anon(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem);
d_lookup_done(dentry);
nfs_free_unlinkdata(data);
dput(dentry);
@@ -118,10 +141,10 @@ static int nfs_call_unlink(struct dentry
struct inode *dir = d_inode(dentry->d_parent);
struct dentry *alias;
- down_read_non_owner(&NFS_I(dir)->rmdir_sem);
+ nfs_down_anon(&NFS_I(dir)->rmdir_sem);
alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq);
if (IS_ERR(alias)) {
- up_read_non_owner(&NFS_I(dir)->rmdir_sem);
+ nfs_up_anon(&NFS_I(dir)->rmdir_sem);
return 0;
}
if (!d_in_lookup(alias)) {
@@ -143,7 +166,7 @@ static int nfs_call_unlink(struct dentry
ret = 0;
spin_unlock(&alias->d_lock);
dput(alias);
- up_read_non_owner(&NFS_I(dir)->rmdir_sem);
+ nfs_up_anon(&NFS_I(dir)->rmdir_sem);
/*
* If we'd displaced old cached devname, free it. At that
* point dentry is definitely not a root, so we won't need
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -163,7 +163,11 @@ struct nfs_inode {
/* Readers: in-flight sillydelete RPC calls */
/* Writers: rmdir */
+#ifdef CONFIG_PREEMPT_RT_BASE
+ struct semaphore rmdir_sem;
+#else
struct rw_semaphore rmdir_sem;
+#endif
struct mutex commit_mutex;
#if IS_ENABLED(CONFIG_NFS_V4)

View File

@ -0,0 +1,208 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 18 Mar 2011 09:18:52 +0100
Subject: buffer_head: Replace bh_uptodate_lock for -rt
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Wrap the bit_spin_lock calls into a separate inline and add the RT
replacements with a real spinlock.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
fs/buffer.c | 21 +++++++--------------
fs/ext4/page-io.c | 6 ++----
fs/ntfs/aops.c | 10 +++-------
fs/xfs/xfs_aops.c | 6 ++----
include/linux/buffer_head.h | 34 ++++++++++++++++++++++++++++++++++
5 files changed, 48 insertions(+), 29 deletions(-)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -273,8 +273,7 @@ static void end_buffer_async_read(struct
* decide that the page is now completely done.
*/
first = page_buffers(page);
- local_irq_save(flags);
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
+ flags = bh_uptodate_lock_irqsave(first);
clear_buffer_async_read(bh);
unlock_buffer(bh);
tmp = bh;
@@ -287,8 +286,7 @@ static void end_buffer_async_read(struct
}
tmp = tmp->b_this_page;
} while (tmp != bh);
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
- local_irq_restore(flags);
+ bh_uptodate_unlock_irqrestore(first, flags);
/*
* If none of the buffers had errors and they are all
@@ -300,9 +298,7 @@ static void end_buffer_async_read(struct
return;
still_busy:
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
- local_irq_restore(flags);
- return;
+ bh_uptodate_unlock_irqrestore(first, flags);
}
/*
@@ -329,8 +325,7 @@ void end_buffer_async_write(struct buffe
}
first = page_buffers(page);
- local_irq_save(flags);
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
+ flags = bh_uptodate_lock_irqsave(first);
clear_buffer_async_write(bh);
unlock_buffer(bh);
@@ -342,15 +337,12 @@ void end_buffer_async_write(struct buffe
}
tmp = tmp->b_this_page;
}
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
- local_irq_restore(flags);
+ bh_uptodate_unlock_irqrestore(first, flags);
end_page_writeback(page);
return;
still_busy:
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
- local_irq_restore(flags);
- return;
+ bh_uptodate_unlock_irqrestore(first, flags);
}
EXPORT_SYMBOL(end_buffer_async_write);
@@ -3349,6 +3341,7 @@ struct buffer_head *alloc_buffer_head(gf
struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
if (ret) {
INIT_LIST_HEAD(&ret->b_assoc_buffers);
+ buffer_head_init_locks(ret);
preempt_disable();
__this_cpu_inc(bh_accounting.nr);
recalc_bh_state();
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -95,8 +95,7 @@ static void ext4_finish_bio(struct bio *
* We check all buffers in the page under BH_Uptodate_Lock
* to avoid races with other end io clearing async_write flags
*/
- local_irq_save(flags);
- bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
+ flags = bh_uptodate_lock_irqsave(head);
do {
if (bh_offset(bh) < bio_start ||
bh_offset(bh) + bh->b_size > bio_end) {
@@ -108,8 +107,7 @@ static void ext4_finish_bio(struct bio *
if (bio->bi_status)
buffer_io_error(bh);
} while ((bh = bh->b_this_page) != head);
- bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
- local_irq_restore(flags);
+ bh_uptodate_unlock_irqrestore(head, flags);
if (!under_io) {
#ifdef CONFIG_EXT4_FS_ENCRYPTION
if (data_page)
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -106,8 +106,7 @@ static void ntfs_end_buffer_async_read(s
"0x%llx.", (unsigned long long)bh->b_blocknr);
}
first = page_buffers(page);
- local_irq_save(flags);
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
+ flags = bh_uptodate_lock_irqsave(first);
clear_buffer_async_read(bh);
unlock_buffer(bh);
tmp = bh;
@@ -122,8 +121,7 @@ static void ntfs_end_buffer_async_read(s
}
tmp = tmp->b_this_page;
} while (tmp != bh);
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
- local_irq_restore(flags);
+ bh_uptodate_unlock_irqrestore(first, flags);
/*
* If none of the buffers had errors then we can set the page uptodate,
* but we first have to perform the post read mst fixups, if the
@@ -156,9 +154,7 @@ static void ntfs_end_buffer_async_read(s
unlock_page(page);
return;
still_busy:
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
- local_irq_restore(flags);
- return;
+ bh_uptodate_unlock_irqrestore(first, flags);
}
/**
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -108,8 +108,7 @@ xfs_finish_page_writeback(
ASSERT(bvec->bv_offset + bvec->bv_len <= PAGE_SIZE);
ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);
- local_irq_save(flags);
- bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
+ flags = bh_uptodate_lock_irqsave(head);
do {
if (off >= bvec->bv_offset &&
off < bvec->bv_offset + bvec->bv_len) {
@@ -131,8 +130,7 @@ xfs_finish_page_writeback(
}
off += bh->b_size;
} while ((bh = bh->b_this_page) != head);
- bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
- local_irq_restore(flags);
+ bh_uptodate_unlock_irqrestore(head, flags);
if (!busy)
end_page_writeback(bvec->bv_page);
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -76,8 +76,42 @@ struct buffer_head {
struct address_space *b_assoc_map; /* mapping this buffer is
associated with */
atomic_t b_count; /* users using this buffer_head */
+#ifdef CONFIG_PREEMPT_RT_BASE
+ spinlock_t b_uptodate_lock;
+#endif
};
+static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
+{
+ unsigned long flags;
+
+#ifndef CONFIG_PREEMPT_RT_BASE
+ local_irq_save(flags);
+ bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
+#else
+ spin_lock_irqsave(&bh->b_uptodate_lock, flags);
+#endif
+ return flags;
+}
+
+static inline void
+bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
+{
+#ifndef CONFIG_PREEMPT_RT_BASE
+ bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
+ local_irq_restore(flags);
+#else
+ spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
+#endif
+}
+
+static inline void buffer_head_init_locks(struct buffer_head *bh)
+{
+#ifdef CONFIG_PREEMPT_RT_BASE
+ spin_lock_init(&bh->b_uptodate_lock);
+#endif
+}
+
/*
* macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
* and buffer_foo() functions.

View File

@ -0,0 +1,55 @@
From 0277aff2b41aef9e2a6bd716ee0076036881334d Mon Sep 17 00:00:00 2001
From: Clark Williams <williams@redhat.com>
Date: Tue, 3 Jul 2018 13:34:30 -0500
Subject: [PATCH] fscache: initialize cookie hash table raw spinlocks
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
The fscache cookie mechanism uses a hash table of hlist_bl_head structures. The
PREEMPT_RT patcheset adds a raw spinlock to this structure and so on PREEMPT_RT
the structures get used uninitialized, causing warnings about bad magic numbers
when spinlock debugging is turned on.
Use the init function for fscache cookies.
Signed-off-by: Clark Williams <williams@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/fscache/cookie.c | 8 ++++++++
fs/fscache/main.c | 1 +
include/linux/fscache.h | 1 +
3 files changed, 10 insertions(+)
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -973,3 +973,11 @@ int __fscache_check_consistency(struct f
return -ESTALE;
}
EXPORT_SYMBOL(__fscache_check_consistency);
+
+void __init fscache_cookie_init(void)
+{
+ int i;
+
+ for (i = 0; i < (1 << fscache_cookie_hash_shift) - 1; i++)
+ INIT_HLIST_BL_HEAD(&fscache_cookie_hash[i]);
+}
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -151,6 +151,7 @@ static int __init fscache_init(void)
ret = -ENOMEM;
goto error_cookie_jar;
}
+ fscache_cookie_init();
fscache_root = kobject_create_and_add("fscache", kernel_kobj);
if (!fscache_root)
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -230,6 +230,7 @@ extern void __fscache_readpages_cancel(s
extern void __fscache_disable_cookie(struct fscache_cookie *, const void *, bool);
extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_t,
bool (*)(void *), void *);
+extern void fscache_cookie_init(void);
/**
* fscache_register_netfs - Register a filesystem as desiring caching services

View File

@ -0,0 +1,46 @@
From: Mike Galbraith <umgwanakikbuti@gmail.com>
Date: Sun, 16 Oct 2016 05:08:30 +0200
Subject: [PATCH] ftrace: Fix trace header alignment
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Line up helper arrows to the right column.
Cc: stable-rt@vger.kernel.org
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
[bigeasy: fixup function tracer header]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3347,17 +3347,17 @@ get_total_entries(struct trace_buffer *b
static void print_lat_help_header(struct seq_file *m)
{
- seq_puts(m, "# _--------=> CPU# \n"
- "# / _-------=> irqs-off \n"
- "# | / _------=> need-resched \n"
- "# || / _-----=> need-resched_lazy \n"
- "# ||| / _----=> hardirq/softirq \n"
- "# |||| / _---=> preempt-depth \n"
- "# ||||| / _--=> preempt-lazy-depth\n"
- "# |||||| / _-=> migrate-disable \n"
- "# ||||||| / delay \n"
- "# cmd pid |||||||| time | caller \n"
- "# \\ / |||||||| \\ | / \n");
+ seq_puts(m, "# _--------=> CPU# \n"
+ "# / _-------=> irqs-off \n"
+ "# | / _------=> need-resched \n"
+ "# || / _-----=> need-resched_lazy \n"
+ "# ||| / _----=> hardirq/softirq \n"
+ "# |||| / _---=> preempt-depth \n"
+ "# ||||| / _--=> preempt-lazy-depth\n"
+ "# |||||| / _-=> migrate-disable \n"
+ "# ||||||| / delay \n"
+ "# cmd pid |||||||| time | caller \n"
+ "# \\ / |||||||| \\ | / \n");
}
static void print_event_info(struct trace_buffer *buf, struct seq_file *m)

View File

@ -0,0 +1,74 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 17 Jul 2011 21:56:42 +0200
Subject: trace: Add migrate-disabled counter to tracing output
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/trace_events.h | 2 ++
kernel/trace/trace.c | 9 ++++++---
kernel/trace/trace_events.c | 2 ++
kernel/trace/trace_output.c | 5 +++++
4 files changed, 15 insertions(+), 3 deletions(-)
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -62,6 +62,8 @@ struct trace_entry {
unsigned char flags;
unsigned char preempt_count;
int pid;
+ unsigned short migrate_disable;
+ unsigned short padding;
};
#define TRACE_EVENT_TYPE_MAX \
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2145,6 +2145,8 @@ tracing_generic_entry_update(struct trac
((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
+
+ entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0;
}
EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
@@ -3348,9 +3350,10 @@ static void print_lat_help_header(struct
"# | / _----=> need-resched \n"
"# || / _---=> hardirq/softirq \n"
"# ||| / _--=> preempt-depth \n"
- "# |||| / delay \n"
- "# cmd pid ||||| time | caller \n"
- "# \\ / ||||| \\ | / \n");
+ "# |||| / _--=> migrate-disable\n"
+ "# ||||| / delay \n"
+ "# cmd pid |||||| time | caller \n"
+ "# \\ / ||||| \\ | / \n");
}
static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -187,6 +187,8 @@ static int trace_define_common_fields(vo
__common_field(unsigned char, flags);
__common_field(unsigned char, preempt_count);
__common_field(int, pid);
+ __common_field(unsigned short, migrate_disable);
+ __common_field(unsigned short, padding);
return ret;
}
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -493,6 +493,11 @@ int trace_print_lat_fmt(struct trace_seq
else
trace_seq_putc(s, '.');
+ if (entry->migrate_disable)
+ trace_seq_printf(s, "%x", entry->migrate_disable);
+ else
+ trace_seq_putc(s, '.');
+
return !trace_seq_has_overflowed(s);
}

View File

@ -0,0 +1,43 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 1 Mar 2013 11:17:42 +0100
Subject: futex: Ensure lock/unlock symetry versus pi_lock and hash bucket lock
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
In exit_pi_state_list() we have the following locking construct:
spin_lock(&hb->lock);
raw_spin_lock_irq(&curr->pi_lock);
...
spin_unlock(&hb->lock);
In !RT this works, but on RT the migrate_enable() function which is
called from spin_unlock() sees atomic context due to the held pi_lock
and just decrements the migrate_disable_atomic counter of the
task. Now the next call to migrate_disable() sees the counter being
negative and issues a warning. That check should be in
migrate_enable() already.
Fix this by dropping pi_lock before unlocking hb->lock and reaquire
pi_lock after that again. This is safe as the loop code reevaluates
head again under the pi_lock.
Reported-by: Yong Zhang <yong.zhang@windriver.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/futex.c | 2 ++
1 file changed, 2 insertions(+)
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -918,7 +918,9 @@ void exit_pi_state_list(struct task_stru
if (head->next != next) {
/* retain curr->pi_lock for the loop invariant */
raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+ raw_spin_unlock_irq(&curr->pi_lock);
spin_unlock(&hb->lock);
+ raw_spin_lock_irq(&curr->pi_lock);
put_pi_state(pi_state);
continue;
}

View File

@ -0,0 +1,113 @@
From: Steven Rostedt <rostedt@goodmis.org>
Date: Tue, 14 Jul 2015 14:26:34 +0200
Subject: futex: Fix bug on when a requeued RT task times out
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Requeue with timeout causes a bug with PREEMPT_RT_FULL.
The bug comes from a timed out condition.
TASK 1 TASK 2
------ ------
futex_wait_requeue_pi()
futex_wait_queue_me()
<timed out>
double_lock_hb();
raw_spin_lock(pi_lock);
if (current->pi_blocked_on) {
} else {
current->pi_blocked_on = PI_WAKE_INPROGRESS;
run_spin_unlock(pi_lock);
spin_lock(hb->lock); <-- blocked!
plist_for_each_entry_safe(this) {
rt_mutex_start_proxy_lock();
task_blocks_on_rt_mutex();
BUG_ON(task->pi_blocked_on)!!!!
The BUG_ON() actually has a check for PI_WAKE_INPROGRESS, but the
problem is that, after TASK 1 sets PI_WAKE_INPROGRESS, it then tries to
grab the hb->lock, which it fails to do so. As the hb->lock is a mutex,
it will block and set the "pi_blocked_on" to the hb->lock.
When TASK 2 goes to requeue it, the check for PI_WAKE_INPROGESS fails
because the task1's pi_blocked_on is no longer set to that, but instead,
set to the hb->lock.
The fix:
When calling rt_mutex_start_proxy_lock() a check is made to see
if the proxy tasks pi_blocked_on is set. If so, exit out early.
Otherwise set it to a new flag PI_REQUEUE_INPROGRESS, which notifies
the proxy task that it is being requeued, and will handle things
appropriately.
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
kernel/locking/rtmutex.c | 31 ++++++++++++++++++++++++++++++-
kernel/locking/rtmutex_common.h | 1 +
2 files changed, 31 insertions(+), 1 deletion(-)
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -137,7 +137,8 @@ static void fixup_rt_mutex_waiters(struc
static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
{
- return waiter && waiter != PI_WAKEUP_INPROGRESS;
+ return waiter && waiter != PI_WAKEUP_INPROGRESS &&
+ waiter != PI_REQUEUE_INPROGRESS;
}
/*
@@ -1763,6 +1764,34 @@ int __rt_mutex_start_proxy_lock(struct r
if (try_to_take_rt_mutex(lock, task, NULL))
return 1;
+#ifdef CONFIG_PREEMPT_RT_FULL
+ /*
+ * In PREEMPT_RT there's an added race.
+ * If the task, that we are about to requeue, times out,
+ * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
+ * to skip this task. But right after the task sets
+ * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
+ * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
+ * This will replace the PI_WAKEUP_INPROGRESS with the actual
+ * lock that it blocks on. We *must not* place this task
+ * on this proxy lock in that case.
+ *
+ * To prevent this race, we first take the task's pi_lock
+ * and check if it has updated its pi_blocked_on. If it has,
+ * we assume that it woke up and we return -EAGAIN.
+ * Otherwise, we set the task's pi_blocked_on to
+ * PI_REQUEUE_INPROGRESS, so that if the task is waking up
+ * it will know that we are in the process of requeuing it.
+ */
+ raw_spin_lock(&task->pi_lock);
+ if (task->pi_blocked_on) {
+ raw_spin_unlock(&task->pi_lock);
+ return -EAGAIN;
+ }
+ task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
+ raw_spin_unlock(&task->pi_lock);
+#endif
+
/* We enforce deadlock detection for futexes */
ret = task_blocks_on_rt_mutex(lock, waiter, task,
RT_MUTEX_FULL_CHAINWALK);
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -131,6 +131,7 @@ enum rtmutex_chainwalk {
* PI-futex support (proxy locking functions, etc.):
*/
#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
+#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2)
extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,

View File

@ -0,0 +1,59 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 8 Mar 2017 14:23:35 +0100
Subject: [PATCH] futex: workaround migrate_disable/enable in different context
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
migrate_disable()/migrate_enable() takes a different path in atomic() vs
!atomic() context. These little hacks ensure that we don't underflow / overflow
the migrate code counts properly while we lock the hb lockwith interrupts
enabled and unlock it with interrupts disabled.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/futex.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2798,9 +2798,18 @@ static int futex_lock_pi(u32 __user *uad
* lock handoff sequence.
*/
raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
+ /*
+ * the migrate_disable() here disables migration in the in_atomic() fast
+ * path which is enabled again in the following spin_unlock(). We have
+ * one migrate_disable() pending in the slow-path which is reversed
+ * after the raw_spin_unlock_irq() where we leave the atomic context.
+ */
+ migrate_disable();
+
spin_unlock(q.lock_ptr);
ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
+ migrate_enable();
if (ret) {
if (ret == 1)
@@ -2947,11 +2956,21 @@ static int futex_unlock_pi(u32 __user *u
* observed.
*/
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+ /*
+ * Magic trickery for now to make the RT migrate disable
+ * logic happy. The following spin_unlock() happens with
+ * interrupts disabled so the internal migrate_enable()
+ * won't undo the migrate_disable() which was issued when
+ * locking hb->lock.
+ */
+ migrate_disable();
spin_unlock(&hb->lock);
/* drops pi_state->pi_mutex.wait_lock */
ret = wake_futex_pi(uaddr, uval, pi_state);
+ migrate_enable();
+
put_pi_state(pi_state);
/*

View File

@ -0,0 +1,38 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:29:57 -0500
Subject: genirq: Disable irqpoll on -rt
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Creates long latencies for no value
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
kernel/irq/spurious.c | 8 ++++++++
1 file changed, 8 insertions(+)
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -442,6 +442,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable ir
static int __init irqfixup_setup(char *str)
{
+#ifdef CONFIG_PREEMPT_RT_BASE
+ pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
+ return 1;
+#endif
irqfixup = 1;
printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
printk(KERN_WARNING "This may impact system performance.\n");
@@ -454,6 +458,10 @@ module_param(irqfixup, int, 0644);
static int __init irqpoll_setup(char *str)
{
+#ifdef CONFIG_PREEMPT_RT_BASE
+ pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
+ return 1;
+#endif
irqfixup = 2;
printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
"enabled\n");

View File

@ -0,0 +1,122 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 21 Aug 2013 17:48:46 +0200
Subject: genirq: Do not invoke the affinity callback via a workqueue on RT
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Joe Korty reported, that __irq_set_affinity_locked() schedules a
workqueue while holding a rawlock which results in a might_sleep()
warning.
This patch uses swork_queue() instead.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/interrupt.h | 6 ++++++
kernel/irq/manage.c | 43 ++++++++++++++++++++++++++++++++++++++++---
2 files changed, 46 insertions(+), 3 deletions(-)
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -13,6 +13,7 @@
#include <linux/hrtimer.h>
#include <linux/kref.h>
#include <linux/workqueue.h>
+#include <linux/swork.h>
#include <linux/atomic.h>
#include <asm/ptrace.h>
@@ -225,6 +226,7 @@ extern void resume_device_irqs(void);
* struct irq_affinity_notify - context for notification of IRQ affinity changes
* @irq: Interrupt to which notification applies
* @kref: Reference count, for internal use
+ * @swork: Swork item, for internal use
* @work: Work item, for internal use
* @notify: Function to be called on change. This will be
* called in process context.
@@ -236,7 +238,11 @@ extern void resume_device_irqs(void);
struct irq_affinity_notify {
unsigned int irq;
struct kref kref;
+#ifdef CONFIG_PREEMPT_RT_BASE
+ struct swork_event swork;
+#else
struct work_struct work;
+#endif
void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
void (*release)(struct kref *ref);
};
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -259,7 +259,12 @@ int irq_set_affinity_locked(struct irq_d
if (desc->affinity_notify) {
kref_get(&desc->affinity_notify->kref);
+
+#ifdef CONFIG_PREEMPT_RT_BASE
+ swork_queue(&desc->affinity_notify->swork);
+#else
schedule_work(&desc->affinity_notify->work);
+#endif
}
irqd_set(data, IRQD_AFFINITY_SET);
@@ -297,10 +302,8 @@ int irq_set_affinity_hint(unsigned int i
}
EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
-static void irq_affinity_notify(struct work_struct *work)
+static void _irq_affinity_notify(struct irq_affinity_notify *notify)
{
- struct irq_affinity_notify *notify =
- container_of(work, struct irq_affinity_notify, work);
struct irq_desc *desc = irq_to_desc(notify->irq);
cpumask_var_t cpumask;
unsigned long flags;
@@ -322,6 +325,35 @@ static void irq_affinity_notify(struct w
kref_put(&notify->kref, notify->release);
}
+#ifdef CONFIG_PREEMPT_RT_BASE
+static void init_helper_thread(void)
+{
+ static int init_sworker_once;
+
+ if (init_sworker_once)
+ return;
+ if (WARN_ON(swork_get()))
+ return;
+ init_sworker_once = 1;
+}
+
+static void irq_affinity_notify(struct swork_event *swork)
+{
+ struct irq_affinity_notify *notify =
+ container_of(swork, struct irq_affinity_notify, swork);
+ _irq_affinity_notify(notify);
+}
+
+#else
+
+static void irq_affinity_notify(struct work_struct *work)
+{
+ struct irq_affinity_notify *notify =
+ container_of(work, struct irq_affinity_notify, work);
+ _irq_affinity_notify(notify);
+}
+#endif
+
/**
* irq_set_affinity_notifier - control notification of IRQ affinity changes
* @irq: Interrupt for which to enable/disable notification
@@ -350,7 +382,12 @@ irq_set_affinity_notifier(unsigned int i
if (notify) {
notify->irq = irq;
kref_init(&notify->kref);
+#ifdef CONFIG_PREEMPT_RT_BASE
+ INIT_SWORK(&notify->swork, irq_affinity_notify);
+ init_helper_thread();
+#else
INIT_WORK(&notify->work, irq_affinity_notify);
+#endif
}
raw_spin_lock_irqsave(&desc->lock, flags);

View File

@ -0,0 +1,46 @@
Subject: genirq: Force interrupt thread on RT
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 03 Apr 2011 11:57:29 +0200
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Force threaded_irqs and optimize the code (force_irqthreads) in regard
to this.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/interrupt.h | 4 ++++
kernel/irq/manage.c | 2 ++
2 files changed, 6 insertions(+)
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -427,7 +427,11 @@ extern int irq_set_irqchip_state(unsigne
bool state);
#ifdef CONFIG_IRQ_FORCED_THREADING
+# ifdef CONFIG_PREEMPT_RT_BASE
+# define force_irqthreads (true)
+# else
extern bool force_irqthreads;
+# endif
#else
#define force_irqthreads (0)
#endif
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -23,6 +23,7 @@
#include "internals.h"
#ifdef CONFIG_IRQ_FORCED_THREADING
+# ifndef CONFIG_PREEMPT_RT_BASE
__read_mostly bool force_irqthreads;
EXPORT_SYMBOL_GPL(force_irqthreads);
@@ -32,6 +33,7 @@ static int __init setup_forced_irqthread
return 0;
}
early_param("threadirqs", setup_forced_irqthreads);
+# endif
#endif
static void __synchronize_hardirq(struct irq_desc *desc)

View File

@ -0,0 +1,26 @@
From: Josh Cartwright <joshc@ni.com>
Date: Thu, 11 Feb 2016 11:54:00 -0600
Subject: genirq: update irq_set_irqchip_state documentation
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
On -rt kernels, the use of migrate_disable()/migrate_enable() is
sufficient to guarantee a task isn't moved to another CPU. Update the
irq_set_irqchip_state() documentation to reflect this.
Signed-off-by: Josh Cartwright <joshc@ni.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/irq/manage.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -2261,7 +2261,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state)
* This call sets the internal irqchip state of an interrupt,
* depending on the value of @which.
*
- * This function should be called with preemption disabled if the
+ * This function should be called with migration disabled if the
* interrupt controller has per-cpu registers.
*/
int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,

View File

@ -0,0 +1,96 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 4 Aug 2017 18:31:00 +0200
Subject: [PATCH] hotplug: duct-tape RT-rwlock usage for non-RT
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
This type is only available on -RT. We need to craft something for
non-RT. Since the only migrate_disable() user is -RT only, there is no
damage.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/cpu.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -73,7 +73,7 @@ static DEFINE_PER_CPU(struct cpuhp_cpu_s
.fail = CPUHP_INVALID,
};
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PREEMPT_RT_FULL)
static DEFINE_PER_CPU(struct rt_rw_lock, cpuhp_pin_lock) = \
__RWLOCK_RT_INITIALIZER(cpuhp_pin_lock);
#endif
@@ -292,6 +292,7 @@ static int cpu_hotplug_disabled;
*/
void pin_current_cpu(void)
{
+#ifdef CONFIG_PREEMPT_RT_FULL
struct rt_rw_lock *cpuhp_pin;
unsigned int cpu;
int ret;
@@ -316,6 +317,7 @@ void pin_current_cpu(void)
goto again;
}
current->pinned_on_cpu = cpu;
+#endif
}
/**
@@ -323,6 +325,7 @@ void pin_current_cpu(void)
*/
void unpin_current_cpu(void)
{
+#ifdef CONFIG_PREEMPT_RT_FULL
struct rt_rw_lock *cpuhp_pin = this_cpu_ptr(&cpuhp_pin_lock);
if (WARN_ON(current->pinned_on_cpu != smp_processor_id()))
@@ -330,6 +333,7 @@ void unpin_current_cpu(void)
current->pinned_on_cpu = -1;
__read_rt_unlock(cpuhp_pin);
+#endif
}
DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
@@ -799,7 +803,9 @@ static int take_cpu_down(void *_param)
static int takedown_cpu(unsigned int cpu)
{
+#ifdef CONFIG_PREEMPT_RT_FULL
struct rt_rw_lock *cpuhp_pin = per_cpu_ptr(&cpuhp_pin_lock, cpu);
+#endif
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
int err;
@@ -813,14 +819,18 @@ static int takedown_cpu(unsigned int cpu
*/
irq_lock_sparse();
+#ifdef CONFIG_PREEMPT_RT_FULL
__write_rt_lock(cpuhp_pin);
+#endif
/*
* So now all preempt/rcu users must observe !cpu_active().
*/
err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
if (err) {
+#ifdef CONFIG_PREEMPT_RT_FULL
__write_rt_unlock(cpuhp_pin);
+#endif
/* CPU refused to die */
irq_unlock_sparse();
/* Unpark the hotplug thread so we can rollback there */
@@ -839,7 +849,9 @@ static int takedown_cpu(unsigned int cpu
wait_for_ap_thread(st, false);
BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
+#ifdef CONFIG_PREEMPT_RT_FULL
__write_rt_unlock(cpuhp_pin);
+#endif
/* Interrupts are moved away from the dying cpu, reenable alloc/free */
irq_unlock_sparse();

View File

@ -0,0 +1,91 @@
Subject: hotplug: Lightweight get online cpus
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 15 Jun 2011 12:36:06 +0200
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
get_online_cpus() is a heavy weight function which involves a global
mutex. migrate_disable() wants a simpler construct which prevents only
a CPU from going doing while a task is in a migrate disabled section.
Implement a per cpu lockless mechanism, which serializes only in the
real unplug case on a global mutex. That serialization affects only
tasks on the cpu which should be brought down.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/cpu.h | 5 +++++
kernel/cpu.c | 15 +++++++++++++++
kernel/sched/core.c | 4 ++++
3 files changed, 24 insertions(+)
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -108,6 +108,8 @@ extern void cpu_hotplug_disable(void);
extern void cpu_hotplug_enable(void);
void clear_tasks_mm_cpumask(int cpu);
int cpu_down(unsigned int cpu);
+extern void pin_current_cpu(void);
+extern void unpin_current_cpu(void);
#else /* CONFIG_HOTPLUG_CPU */
@@ -118,6 +120,9 @@ static inline void cpus_read_unlock(void
static inline void lockdep_assert_cpus_held(void) { }
static inline void cpu_hotplug_disable(void) { }
static inline void cpu_hotplug_enable(void) { }
+static inline void pin_current_cpu(void) { }
+static inline void unpin_current_cpu(void) { }
+
#endif /* !CONFIG_HOTPLUG_CPU */
/* Wrappers which go away once all code is converted */
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -282,6 +282,21 @@ static int cpu_hotplug_disabled;
#ifdef CONFIG_HOTPLUG_CPU
+/**
+ * pin_current_cpu - Prevent the current cpu from being unplugged
+ */
+void pin_current_cpu(void)
+{
+
+}
+
+/**
+ * unpin_current_cpu - Allow unplug of current cpu
+ */
+void unpin_current_cpu(void)
+{
+}
+
DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
void cpus_read_lock(void)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7225,6 +7225,7 @@ void migrate_disable(void)
}
preempt_disable();
+ pin_current_cpu();
migrate_disable_update_cpus_allowed(p);
p->migrate_disable = 1;
@@ -7290,12 +7291,15 @@ void migrate_enable(void)
arg.task = p;
arg.dest_cpu = dest_cpu;
+ unpin_current_cpu();
preempt_enable();
stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
tlb_migrate_finish(p->mm);
+
return;
}
}
+ unpin_current_cpu();
preempt_enable();
}
EXPORT_SYMBOL(migrate_enable);

View File

@ -0,0 +1,92 @@
From: Yang Shi <yang.shi@windriver.com>
Date: Mon, 16 Sep 2013 14:09:19 -0700
Subject: hrtimer: Move schedule_work call to helper thread
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
When run ltp leapsec_timer test, the following call trace is caught:
BUG: sleeping function called from invalid context at kernel/rtmutex.c:659
in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/1
Preemption disabled at:[<ffffffff810857f3>] cpu_startup_entry+0x133/0x310
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.10.10-rt3 #2
Hardware name: Intel Corporation Calpella platform/MATXM-CORE-411-B, BIOS 4.6.3 08/18/2010
ffffffff81c2f800 ffff880076843e40 ffffffff8169918d ffff880076843e58
ffffffff8106db31 ffff88007684b4a0 ffff880076843e70 ffffffff8169d9c0
ffff88007684b4a0 ffff880076843eb0 ffffffff81059da1 0000001876851200
Call Trace:
<IRQ> [<ffffffff8169918d>] dump_stack+0x19/0x1b
[<ffffffff8106db31>] __might_sleep+0xf1/0x170
[<ffffffff8169d9c0>] rt_spin_lock+0x20/0x50
[<ffffffff81059da1>] queue_work_on+0x61/0x100
[<ffffffff81065aa1>] clock_was_set_delayed+0x21/0x30
[<ffffffff810883be>] do_timer+0x40e/0x660
[<ffffffff8108f487>] tick_do_update_jiffies64+0xf7/0x140
[<ffffffff8108fe42>] tick_check_idle+0x92/0xc0
[<ffffffff81044327>] irq_enter+0x57/0x70
[<ffffffff816a040e>] smp_apic_timer_interrupt+0x3e/0x9b
[<ffffffff8169f80a>] apic_timer_interrupt+0x6a/0x70
<EOI> [<ffffffff8155ea1c>] ? cpuidle_enter_state+0x4c/0xc0
[<ffffffff8155eb68>] cpuidle_idle_call+0xd8/0x2d0
[<ffffffff8100b59e>] arch_cpu_idle+0xe/0x30
[<ffffffff8108585e>] cpu_startup_entry+0x19e/0x310
[<ffffffff8168efa2>] start_secondary+0x1ad/0x1b0
The clock_was_set_delayed is called in hard IRQ handler (timer interrupt), which
calls schedule_work.
Under PREEMPT_RT_FULL, schedule_work calls spinlocks which could sleep, so it's
not safe to call schedule_work in interrupt context.
Reference upstream commit b68d61c705ef02384c0538b8d9374545097899ca
(rt,ntp: Move call to schedule_delayed_work() to helper thread)
from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git, which
makes a similar change.
Signed-off-by: Yang Shi <yang.shi@windriver.com>
[bigeasy: use swork_queue() instead a helper thread]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/time/hrtimer.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -730,6 +730,29 @@ static void hrtimer_switch_to_hres(void)
retrigger_next_event(NULL);
}
+#ifdef CONFIG_PREEMPT_RT_FULL
+
+static struct swork_event clock_set_delay_work;
+
+static void run_clock_set_delay(struct swork_event *event)
+{
+ clock_was_set();
+}
+
+void clock_was_set_delayed(void)
+{
+ swork_queue(&clock_set_delay_work);
+}
+
+static __init int create_clock_set_delay_thread(void)
+{
+ WARN_ON(swork_get());
+ INIT_SWORK(&clock_set_delay_work, run_clock_set_delay);
+ return 0;
+}
+early_initcall(create_clock_set_delay_thread);
+#else /* PREEMPT_RT_FULL */
+
static void clock_was_set_work(struct work_struct *work)
{
clock_was_set();
@@ -745,6 +768,7 @@ void clock_was_set_delayed(void)
{
schedule_work(&hrtimer_work);
}
+#endif
#else

View File

@ -0,0 +1,210 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 3 Jul 2009 08:44:31 -0500
Subject: hrtimer: by timers by default into the softirq context
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
We can't have hrtimers callbacks running in hardirq context on RT. Therefore
the timers are deferred to the softirq context by default.
There are few timers which expect to be run in hardirq context even on RT.
Those are:
- very short running where low latency is critical (kvm lapic)
- timers which take raw locks and need run in hard-irq context (perf, sched)
- wake up related timer (kernel side of clock_nanosleep() and so on)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/x86/kvm/lapic.c | 2 +-
include/linux/hrtimer.h | 6 ++++++
kernel/events/core.c | 4 ++--
kernel/sched/core.c | 2 +-
kernel/sched/deadline.c | 2 +-
kernel/sched/fair.c | 4 ++--
kernel/sched/rt.c | 4 ++--
kernel/time/hrtimer.c | 21 +++++++++++++++++++--
kernel/time/tick-broadcast-hrtimer.c | 2 +-
kernel/time/tick-sched.c | 2 +-
kernel/watchdog.c | 2 +-
11 files changed, 37 insertions(+), 14 deletions(-)
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2176,7 +2176,7 @@ int kvm_create_lapic(struct kvm_vcpu *vc
apic->vcpu = vcpu;
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS_PINNED);
+ HRTIMER_MODE_ABS_PINNED_HARD);
apic->lapic_timer.timer.function = apic_timer_fn;
/*
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -42,6 +42,7 @@ enum hrtimer_mode {
HRTIMER_MODE_REL = 0x01,
HRTIMER_MODE_PINNED = 0x02,
HRTIMER_MODE_SOFT = 0x04,
+ HRTIMER_MODE_HARD = 0x08,
HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED,
HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED,
@@ -52,6 +53,11 @@ enum hrtimer_mode {
HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT,
HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT,
+ HRTIMER_MODE_ABS_HARD = HRTIMER_MODE_ABS | HRTIMER_MODE_HARD,
+ HRTIMER_MODE_REL_HARD = HRTIMER_MODE_REL | HRTIMER_MODE_HARD,
+
+ HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD,
+ HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD,
};
/*
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1102,7 +1102,7 @@ static void __perf_mux_hrtimer_init(stru
cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
raw_spin_lock_init(&cpuctx->hrtimer_lock);
- hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
timer->function = perf_mux_hrtimer_handler;
}
@@ -9166,7 +9166,7 @@ static void perf_swevent_init_hrtimer(st
if (!is_sampling_event(event))
return;
- hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
hwc->hrtimer.function = perf_swevent_hrtimer;
/*
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -321,7 +321,7 @@ static void hrtick_rq_init(struct rq *rq
rq->hrtick_csd.info = rq;
#endif
- hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
rq->hrtick_timer.function = hrtick;
}
#else /* CONFIG_SCHED_HRTICK */
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1053,7 +1053,7 @@ void init_dl_task_timer(struct sched_dl_
{
struct hrtimer *timer = &dl_se->dl_timer;
- hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
timer->function = dl_task_timer;
}
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5182,9 +5182,9 @@ void init_cfs_bandwidth(struct cfs_bandw
cfs_b->period = ns_to_ktime(default_cfs_period());
INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
- hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
cfs_b->period_timer.function = sched_cfs_period_timer;
- hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
cfs_b->slack_timer.function = sched_cfs_slack_timer;
}
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -43,8 +43,8 @@ void init_rt_bandwidth(struct rt_bandwid
raw_spin_lock_init(&rt_b->rt_runtime_lock);
- hrtimer_init(&rt_b->rt_period_timer,
- CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_HARD);
rt_b->rt_period_timer.function = sched_rt_period_timer;
}
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1135,7 +1135,9 @@ void hrtimer_start_range_ns(struct hrtim
* Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
* match.
*/
+#ifndef CONFIG_PREEMPT_RT_BASE
WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
+#endif
base = lock_hrtimer_base(timer, &flags);
@@ -1295,10 +1297,17 @@ static inline int hrtimer_clockid_to_bas
static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
enum hrtimer_mode mode)
{
- bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
- int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
+ bool softtimer;
+ int base;
struct hrtimer_cpu_base *cpu_base;
+ softtimer = !!(mode & HRTIMER_MODE_SOFT);
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (!softtimer && !(mode & HRTIMER_MODE_HARD))
+ softtimer = true;
+#endif
+ base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
+
memset(timer, 0, sizeof(struct hrtimer));
cpu_base = raw_cpu_ptr(&hrtimer_bases);
@@ -1682,6 +1691,14 @@ static void __hrtimer_init_sleeper(struc
enum hrtimer_mode mode,
struct task_struct *task)
{
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (!(mode & (HRTIMER_MODE_SOFT | HRTIMER_MODE_HARD))) {
+ if (task_is_realtime(current) || system_state != SYSTEM_RUNNING)
+ mode |= HRTIMER_MODE_HARD;
+ else
+ mode |= HRTIMER_MODE_SOFT;
+ }
+#endif
__hrtimer_init(&sl->timer, clock_id, mode);
sl->timer.function = hrtimer_wakeup;
sl->task = task;
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -106,7 +106,7 @@ static enum hrtimer_restart bc_handler(s
void tick_setup_hrtimer_broadcast(void)
{
- hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
bctimer.function = bc_handler;
clockevents_register_device(&ce_broadcast_hrtimer);
}
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1310,7 +1310,7 @@ void tick_setup_sched_timer(void)
/*
* Emulate tick processing via per-CPU hrtimers:
*/
- hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
ts->sched_timer.function = tick_sched_timer;
/* Get the next period (per-CPU) */
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -463,7 +463,7 @@ static void watchdog_enable(unsigned int
* Start the timer first to prevent the NMI watchdog triggering
* before the timer has a chance to fire.
*/
- hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
hrtimer->function = watchdog_timer_fn;
hrtimer_start(hrtimer, ns_to_ktime(sample_period),
HRTIMER_MODE_REL_PINNED);

View File

@ -0,0 +1,252 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 4 Sep 2017 18:31:50 +0200
Subject: [PATCH] hrtimer: consolidate hrtimer_init() + hrtimer_init_sleeper() calls
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
hrtimer_init_sleeper() calls require a prior initialisation of the
hrtimer object with hrtimer_init(). Lets make the initialisation of
the hrtimer object part of hrtimer_init_sleeper(). To remain
consistent consider init_on_stack as well.
Beside adapting the hrtimer_init_sleeper[_on_stack]() functions, call
sites need to be updated as well.
[anna-maria: Updating the commit message]
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
block/blk-mq.c | 3 +--
include/linux/hrtimer.h | 19 ++++++++++++++++---
include/linux/wait.h | 4 ++--
kernel/futex.c | 19 ++++++++-----------
kernel/time/hrtimer.c | 46 ++++++++++++++++++++++++++++++++++++----------
net/core/pktgen.c | 4 ++--
6 files changed, 65 insertions(+), 30 deletions(-)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2982,10 +2982,9 @@ static bool blk_mq_poll_hybrid_sleep(str
kt = nsecs;
mode = HRTIMER_MODE_REL;
- hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode);
+ hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode, current);
hrtimer_set_expires(&hs.timer, kt);
- hrtimer_init_sleeper(&hs, current);
do {
if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
break;
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -364,10 +364,17 @@ DECLARE_PER_CPU(struct tick_device, tick
/* Initialize timers: */
extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
enum hrtimer_mode mode);
+extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
+ enum hrtimer_mode mode,
+ struct task_struct *task);
#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock,
enum hrtimer_mode mode);
+extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+ clockid_t clock_id,
+ enum hrtimer_mode mode,
+ struct task_struct *task);
extern void destroy_hrtimer_on_stack(struct hrtimer *timer);
#else
@@ -377,6 +384,15 @@ static inline void hrtimer_init_on_stack
{
hrtimer_init(timer, which_clock, mode);
}
+
+static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+ clockid_t clock_id,
+ enum hrtimer_mode mode,
+ struct task_struct *task)
+{
+ hrtimer_init_sleeper(sl, clock_id, mode, task);
+}
+
static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
#endif
@@ -480,9 +496,6 @@ extern long hrtimer_nanosleep(const stru
const enum hrtimer_mode mode,
const clockid_t clockid);
-extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
- struct task_struct *tsk);
-
extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta,
const enum hrtimer_mode mode);
extern int schedule_hrtimeout_range_clock(ktime_t *expires,
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -489,8 +489,8 @@ do { \
int __ret = 0; \
struct hrtimer_sleeper __t; \
\
- hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); \
- hrtimer_init_sleeper(&__t, current); \
+ hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC, HRTIMER_MODE_REL, \
+ current); \
if ((timeout) != KTIME_MAX) \
hrtimer_start_range_ns(&__t.timer, timeout, \
current->timer_slack_ns, \
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2624,10 +2624,9 @@ static int futex_wait(u32 __user *uaddr,
if (abs_time) {
to = &timeout;
- hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
- CLOCK_REALTIME : CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS);
- hrtimer_init_sleeper(to, current);
+ hrtimer_init_sleeper_on_stack(to, (flags & FLAGS_CLOCKRT) ?
+ CLOCK_REALTIME : CLOCK_MONOTONIC,
+ HRTIMER_MODE_ABS, current);
hrtimer_set_expires_range_ns(&to->timer, *abs_time,
current->timer_slack_ns);
}
@@ -2726,9 +2725,8 @@ static int futex_lock_pi(u32 __user *uad
if (time) {
to = &timeout;
- hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
- HRTIMER_MODE_ABS);
- hrtimer_init_sleeper(to, current);
+ hrtimer_init_sleeper_on_stack(to, CLOCK_REALTIME,
+ HRTIMER_MODE_ABS, current);
hrtimer_set_expires(&to->timer, *time);
}
@@ -3144,10 +3142,9 @@ static int futex_wait_requeue_pi(u32 __u
if (abs_time) {
to = &timeout;
- hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
- CLOCK_REALTIME : CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS);
- hrtimer_init_sleeper(to, current);
+ hrtimer_init_sleeper_on_stack(to, (flags & FLAGS_CLOCKRT) ?
+ CLOCK_REALTIME : CLOCK_MONOTONIC,
+ HRTIMER_MODE_ABS, current);
hrtimer_set_expires_range_ns(&to->timer, *abs_time,
current->timer_slack_ns);
}
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1649,13 +1649,44 @@ static enum hrtimer_restart hrtimer_wake
return HRTIMER_NORESTART;
}
-void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
+static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
+ clockid_t clock_id,
+ enum hrtimer_mode mode,
+ struct task_struct *task)
{
+ __hrtimer_init(&sl->timer, clock_id, mode);
sl->timer.function = hrtimer_wakeup;
sl->task = task;
}
+
+/**
+ * hrtimer_init_sleeper - initialize sleeper to the given clock
+ * @sl: sleeper to be initialized
+ * @clock_id: the clock to be used
+ * @mode: timer mode abs/rel
+ * @task: the task to wake up
+ */
+void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
+ enum hrtimer_mode mode, struct task_struct *task)
+{
+ debug_init(&sl->timer, clock_id, mode);
+ __hrtimer_init_sleeper(sl, clock_id, mode, task);
+
+}
EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
+#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
+void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+ clockid_t clock_id,
+ enum hrtimer_mode mode,
+ struct task_struct *task)
+{
+ debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr);
+ __hrtimer_init_sleeper(sl, clock_id, mode, task);
+}
+EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack);
+#endif
+
int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
{
switch(restart->nanosleep.type) {
@@ -1679,8 +1710,6 @@ static int __sched do_nanosleep(struct h
{
struct restart_block *restart;
- hrtimer_init_sleeper(t, current);
-
do {
set_current_state(TASK_INTERRUPTIBLE);
hrtimer_start_expires(&t->timer, mode);
@@ -1717,10 +1746,9 @@ static long __sched hrtimer_nanosleep_re
struct hrtimer_sleeper t;
int ret;
- hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid,
- HRTIMER_MODE_ABS);
+ hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid,
+ HRTIMER_MODE_ABS, current);
hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
-
ret = do_nanosleep(&t, HRTIMER_MODE_ABS);
destroy_hrtimer_on_stack(&t.timer);
return ret;
@@ -1738,7 +1766,7 @@ long hrtimer_nanosleep(const struct time
if (dl_task(current) || rt_task(current))
slack = 0;
- hrtimer_init_on_stack(&t.timer, clockid, mode);
+ hrtimer_init_sleeper_on_stack(&t, clockid, mode, current);
hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack);
ret = do_nanosleep(&t, mode);
if (ret != -ERESTART_RESTARTBLOCK)
@@ -1937,11 +1965,9 @@ schedule_hrtimeout_range_clock(ktime_t *
return -EINTR;
}
- hrtimer_init_on_stack(&t.timer, clock_id, mode);
+ hrtimer_init_sleeper_on_stack(&t, clock_id, mode, current);
hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
- hrtimer_init_sleeper(&t, current);
-
hrtimer_start_expires(&t.timer, mode);
if (likely(t.task))
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2162,7 +2162,8 @@ static void spin(struct pktgen_dev *pkt_
s64 remaining;
struct hrtimer_sleeper t;
- hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS,
+ current);
hrtimer_set_expires(&t.timer, spin_until);
remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
@@ -2177,7 +2178,6 @@ static void spin(struct pktgen_dev *pkt_
} while (ktime_compare(end_time, spin_until) < 0);
} else {
/* see do_nanosleep */
- hrtimer_init_sleeper(&t, current);
do {
set_current_state(TASK_INTERRUPTIBLE);
hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);

View File

@ -0,0 +1,273 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:29:34 -0500
Subject: hrtimers: Prepare full preemption
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18-rc8-rt1.tar.xz
Make cancellation of a running callback in softirq context safe
against preemption.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
fs/timerfd.c | 5 ++++-
include/linux/hrtimer.h | 13 ++++++++++++-
include/linux/posix-timers.h | 2 +-
kernel/time/alarmtimer.c | 2 +-
kernel/time/hrtimer.c | 33 ++++++++++++++++++++++++++++++++-
kernel/time/itimer.c | 1 +
kernel/time/posix-timers.c | 39 +++++++++++++++++++++++++++++++++++++--
7 files changed, 88 insertions(+), 7 deletions(-)
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -471,7 +471,10 @@ static int do_timerfd_settime(int ufd, i
break;
}
spin_unlock_irq(&ctx->wqh.lock);
- cpu_relax();
+ if (isalarm(ctx))
+ hrtimer_wait_for_timer(&ctx->t.alarm.timer);
+ else
+ hrtimer_wait_for_timer(&ctx->t.tmr);
}
/*
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -22,6 +22,7 @@
#include <linux/percpu.h>
#include <linux/timer.h>
#include <linux/timerqueue.h>
+#include <linux/wait.h>
struct hrtimer_clock_base;
struct hrtimer_cpu_base;
@@ -216,6 +217,9 @@ struct hrtimer_cpu_base {
ktime_t expires_next;
struct hrtimer *next_timer;
ktime_t softirq_expires_next;
+#ifdef CONFIG_PREEMPT_RT_BASE
+ wait_queue_head_t wait;
+#endif
struct hrtimer *softirq_next_timer;
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
} ____cacheline_aligned;
@@ -433,6 +437,13 @@ static inline void hrtimer_restart(struc
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}
+/* Softirq preemption could deadlock timer removal */
+#ifdef CONFIG_PREEMPT_RT_BASE
+ extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
+#else
+# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0)
+#endif
+
/* Query timers: */
extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
@@ -458,7 +469,7 @@ static inline int hrtimer_is_queued(stru
* Helper function to check, whether the timer is running the callback
* function
*/
-static inline int hrtimer_callback_running(struct hrtimer *timer)
+static inline int hrtimer_callback_running(const struct hrtimer *timer)
{
return timer->base->running == timer;
}
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -114,8 +114,8 @@ struct k_itimer {
struct {
struct alarm alarmtimer;
} alarm;
- struct rcu_head rcu;
} it;
+ struct rcu_head rcu;
};
void run_posix_cpu_timers(struct task_struct *task);
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -436,7 +436,7 @@ int alarm_cancel(struct alarm *alarm)
int ret = alarm_try_to_cancel(alarm);
if (ret >= 0)
return ret;
- cpu_relax();
+ hrtimer_wait_for_timer(&alarm->timer);
}
}
EXPORT_SYMBOL_GPL(alarm_cancel);
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -939,6 +939,33 @@ u64 hrtimer_forward(struct hrtimer *time
}
EXPORT_SYMBOL_GPL(hrtimer_forward);
+#ifdef CONFIG_PREEMPT_RT_BASE
+# define wake_up_timer_waiters(b) wake_up(&(b)->wait)
+
+/**
+ * hrtimer_wait_for_timer - Wait for a running timer
+ *
+ * @timer: timer to wait for
+ *
+ * The function waits in case the timers callback function is
+ * currently executed on the waitqueue of the timer base. The
+ * waitqueue is woken up after the timer callback function has
+ * finished execution.
+ */
+void hrtimer_wait_for_timer(const struct hrtimer *timer)
+{
+ struct hrtimer_clock_base *base = timer->base;
+
+ if (base && base->cpu_base &&
+ base->index >= HRTIMER_BASE_MONOTONIC_SOFT)
+ wait_event(base->cpu_base->wait,
+ !(hrtimer_callback_running(timer)));
+}
+
+#else
+# define wake_up_timer_waiters(b) do { } while (0)
+#endif
+
/*
* enqueue_hrtimer - internal function to (re)start a timer
*
@@ -1171,7 +1198,7 @@ int hrtimer_cancel(struct hrtimer *timer
if (ret >= 0)
return ret;
- cpu_relax();
+ hrtimer_wait_for_timer(timer);
}
}
EXPORT_SYMBOL_GPL(hrtimer_cancel);
@@ -1477,6 +1504,7 @@ static __latent_entropy void hrtimer_run
hrtimer_update_softirq_timer(cpu_base, true);
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+ wake_up_timer_waiters(cpu_base);
}
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -1847,6 +1875,9 @@ int hrtimers_prepare_cpu(unsigned int cp
cpu_base->softirq_next_timer = NULL;
cpu_base->expires_next = KTIME_MAX;
cpu_base->softirq_expires_next = KTIME_MAX;
+#ifdef CONFIG_PREEMPT_RT_BASE
+ init_waitqueue_head(&cpu_base->wait);
+#endif
return 0;
}
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -214,6 +214,7 @@ int do_setitimer(int which, struct itime
/* We are sharing ->siglock with it_real_fn() */
if (hrtimer_try_to_cancel(timer) < 0) {
spin_unlock_irq(&tsk->sighand->siglock);
+ hrtimer_wait_for_timer(&tsk->signal->real_timer);
goto again;
}
expires = timeval_to_ktime(value->it_value);
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -469,7 +469,7 @@ static struct k_itimer * alloc_posix_tim
static void k_itimer_rcu_free(struct rcu_head *head)
{
- struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu);
+ struct k_itimer *tmr = container_of(head, struct k_itimer, rcu);
kmem_cache_free(posix_timers_cache, tmr);
}
@@ -486,7 +486,7 @@ static void release_posix_timer(struct k
}
put_pid(tmr->it_pid);
sigqueue_free(tmr->sigq);
- call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
+ call_rcu(&tmr->rcu, k_itimer_rcu_free);
}
static int common_timer_create(struct k_itimer *new_timer)
@@ -825,6 +825,22 @@ static void common_hrtimer_arm(struct k_
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}
+/*
+ * Protected by RCU!
+ */
+static void timer_wait_for_callback(const struct k_clock *kc, struct k_itimer *timr)
+{
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (kc->timer_arm == common_hrtimer_arm)
+ hrtimer_wait_for_timer(&timr->it.real.timer);
+ else if (kc == &alarm_clock)
+ hrtimer_wait_for_timer(&timr->it.alarm.alarmtimer.timer);
+ else
+ /* FIXME: Whacky hack for posix-cpu-timers */
+ schedule_timeout(1);
+#endif
+}
+
static int common_hrtimer_try_to_cancel(struct k_itimer *timr)
{
return hrtimer_try_to_cancel(&timr->it.real.timer);
@@ -889,6 +905,7 @@ static int do_timer_settime(timer_t time
if (!timr)
return -EINVAL;
+ rcu_read_lock();
kc = timr->kclock;
if (WARN_ON_ONCE(!kc || !kc->timer_set))
error = -EINVAL;
@@ -897,9 +914,12 @@ static int do_timer_settime(timer_t time
unlock_timer(timr, flag);
if (error == TIMER_RETRY) {
+ timer_wait_for_callback(kc, timr);
old_spec64 = NULL; // We already got the old time...
+ rcu_read_unlock();
goto retry;
}
+ rcu_read_unlock();
return error;
}
@@ -981,10 +1001,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, t
if (!timer)
return -EINVAL;
+ rcu_read_lock();
if (timer_delete_hook(timer) == TIMER_RETRY) {
unlock_timer(timer, flags);
+ timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
+ timer);
+ rcu_read_unlock();
goto retry_delete;
}
+ rcu_read_unlock();
spin_lock(&current->sighand->siglock);
list_del(&timer->list);
@@ -1010,8 +1035,18 @@ static void itimer_delete(struct k_itime
retry_delete:
spin_lock_irqsave(&timer->it_lock, flags);
+ /* On RT we can race with a deletion */
+ if (!timer->it_signal) {
+ unlock_timer(timer, flags);
+ return;
+ }
+
if (timer_delete_hook(timer) == TIMER_RETRY) {
+ rcu_read_lock();
unlock_timer(timer, flags);
+ timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
+ timer);
+ rcu_read_unlock();
goto retry_delete;
}
list_del(&timer->list);

Some files were not shown because too many files have changed in this diff Show More