Drop RT patch

This commit is contained in:
Bastian Blank 2018-06-19 21:43:05 +02:00
parent 7b7ba382cf
commit 055a1807cb
518 changed files with 0 additions and 75570 deletions

View File

@ -1,236 +0,0 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:38 +0200
Subject: [PATCH 1/6] ARM: at91: add TCB registers definitions
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Add registers and bits definitions for the timer counter blocks found on
Atmel ARM SoCs.
Tested-by: Alexander Dahl <ada@thorsis.com>
Tested-by: Andras Szemzo <szemzo.andras@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/soc/at91/atmel_tcb.h | 216 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 216 insertions(+)
create mode 100644 include/soc/at91/atmel_tcb.h
--- /dev/null
+++ b/include/soc/at91/atmel_tcb.h
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018 Microchip */
+
+#ifndef __SOC_ATMEL_TCB_H
+#define __SOC_ATMEL_TCB_H
+
+/* Channel registers */
+#define ATMEL_TC_COFFS(c) ((c) * 0x40)
+#define ATMEL_TC_CCR(c) ATMEL_TC_COFFS(c)
+#define ATMEL_TC_CMR(c) (ATMEL_TC_COFFS(c) + 0x4)
+#define ATMEL_TC_SMMR(c) (ATMEL_TC_COFFS(c) + 0x8)
+#define ATMEL_TC_RAB(c) (ATMEL_TC_COFFS(c) + 0xc)
+#define ATMEL_TC_CV(c) (ATMEL_TC_COFFS(c) + 0x10)
+#define ATMEL_TC_RA(c) (ATMEL_TC_COFFS(c) + 0x14)
+#define ATMEL_TC_RB(c) (ATMEL_TC_COFFS(c) + 0x18)
+#define ATMEL_TC_RC(c) (ATMEL_TC_COFFS(c) + 0x1c)
+#define ATMEL_TC_SR(c) (ATMEL_TC_COFFS(c) + 0x20)
+#define ATMEL_TC_IER(c) (ATMEL_TC_COFFS(c) + 0x24)
+#define ATMEL_TC_IDR(c) (ATMEL_TC_COFFS(c) + 0x28)
+#define ATMEL_TC_IMR(c) (ATMEL_TC_COFFS(c) + 0x2c)
+#define ATMEL_TC_EMR(c) (ATMEL_TC_COFFS(c) + 0x30)
+
+/* Block registers */
+#define ATMEL_TC_BCR 0xc0
+#define ATMEL_TC_BMR 0xc4
+#define ATMEL_TC_QIER 0xc8
+#define ATMEL_TC_QIDR 0xcc
+#define ATMEL_TC_QIMR 0xd0
+#define ATMEL_TC_QISR 0xd4
+#define ATMEL_TC_FMR 0xd8
+#define ATMEL_TC_WPMR 0xe4
+
+/* CCR fields */
+#define ATMEL_TC_CCR_CLKEN BIT(0)
+#define ATMEL_TC_CCR_CLKDIS BIT(1)
+#define ATMEL_TC_CCR_SWTRG BIT(2)
+
+/* Common CMR fields */
+#define ATMEL_TC_CMR_TCLKS_MSK GENMASK(2, 0)
+#define ATMEL_TC_CMR_TCLK(x) (x)
+#define ATMEL_TC_CMR_XC(x) ((x) + 5)
+#define ATMEL_TC_CMR_CLKI BIT(3)
+#define ATMEL_TC_CMR_BURST_MSK GENMASK(5, 4)
+#define ATMEL_TC_CMR_BURST_XC(x) (((x) + 1) << 4)
+#define ATMEL_TC_CMR_WAVE BIT(15)
+
+/* Capture mode CMR fields */
+#define ATMEL_TC_CMR_LDBSTOP BIT(6)
+#define ATMEL_TC_CMR_LDBDIS BIT(7)
+#define ATMEL_TC_CMR_ETRGEDG_MSK GENMASK(9, 8)
+#define ATMEL_TC_CMR_ETRGEDG_NONE (0 << 8)
+#define ATMEL_TC_CMR_ETRGEDG_RISING (1 << 8)
+#define ATMEL_TC_CMR_ETRGEDG_FALLING (2 << 8)
+#define ATMEL_TC_CMR_ETRGEDG_BOTH (3 << 8)
+#define ATMEL_TC_CMR_ABETRG BIT(10)
+#define ATMEL_TC_CMR_CPCTRG BIT(14)
+#define ATMEL_TC_CMR_LDRA_MSK GENMASK(17, 16)
+#define ATMEL_TC_CMR_LDRA_NONE (0 << 16)
+#define ATMEL_TC_CMR_LDRA_RISING (1 << 16)
+#define ATMEL_TC_CMR_LDRA_FALLING (2 << 16)
+#define ATMEL_TC_CMR_LDRA_BOTH (3 << 16)
+#define ATMEL_TC_CMR_LDRB_MSK GENMASK(19, 18)
+#define ATMEL_TC_CMR_LDRB_NONE (0 << 18)
+#define ATMEL_TC_CMR_LDRB_RISING (1 << 18)
+#define ATMEL_TC_CMR_LDRB_FALLING (2 << 18)
+#define ATMEL_TC_CMR_LDRB_BOTH (3 << 18)
+#define ATMEL_TC_CMR_SBSMPLR_MSK GENMASK(22, 20)
+#define ATMEL_TC_CMR_SBSMPLR(x) ((x) << 20)
+
+/* Waveform mode CMR fields */
+#define ATMEL_TC_CMR_CPCSTOP BIT(6)
+#define ATMEL_TC_CMR_CPCDIS BIT(7)
+#define ATMEL_TC_CMR_EEVTEDG_MSK GENMASK(9, 8)
+#define ATMEL_TC_CMR_EEVTEDG_NONE (0 << 8)
+#define ATMEL_TC_CMR_EEVTEDG_RISING (1 << 8)
+#define ATMEL_TC_CMR_EEVTEDG_FALLING (2 << 8)
+#define ATMEL_TC_CMR_EEVTEDG_BOTH (3 << 8)
+#define ATMEL_TC_CMR_EEVT_MSK GENMASK(11, 10)
+#define ATMEL_TC_CMR_EEVT_XC(x) (((x) + 1) << 10)
+#define ATMEL_TC_CMR_ENETRG BIT(12)
+#define ATMEL_TC_CMR_WAVESEL_MSK GENMASK(14, 13)
+#define ATMEL_TC_CMR_WAVESEL_UP (0 << 13)
+#define ATMEL_TC_CMR_WAVESEL_UPDOWN (1 << 13)
+#define ATMEL_TC_CMR_WAVESEL_UPRC (2 << 13)
+#define ATMEL_TC_CMR_WAVESEL_UPDOWNRC (3 << 13)
+#define ATMEL_TC_CMR_ACPA_MSK GENMASK(17, 16)
+#define ATMEL_TC_CMR_ACPA(a) (ATMEL_TC_CMR_ACTION_##a << 16)
+#define ATMEL_TC_CMR_ACPC_MSK GENMASK(19, 18)
+#define ATMEL_TC_CMR_ACPC(a) (ATMEL_TC_CMR_ACTION_##a << 18)
+#define ATMEL_TC_CMR_AEEVT_MSK GENMASK(21, 20)
+#define ATMEL_TC_CMR_AEEVT(a) (ATMEL_TC_CMR_ACTION_##a << 20)
+#define ATMEL_TC_CMR_ASWTRG_MSK GENMASK(23, 22)
+#define ATMEL_TC_CMR_ASWTRG(a) (ATMEL_TC_CMR_ACTION_##a << 22)
+#define ATMEL_TC_CMR_BCPB_MSK GENMASK(25, 24)
+#define ATMEL_TC_CMR_BCPB(a) (ATMEL_TC_CMR_ACTION_##a << 24)
+#define ATMEL_TC_CMR_BCPC_MSK GENMASK(27, 26)
+#define ATMEL_TC_CMR_BCPC(a) (ATMEL_TC_CMR_ACTION_##a << 26)
+#define ATMEL_TC_CMR_BEEVT_MSK GENMASK(29, 28)
+#define ATMEL_TC_CMR_BEEVT(a) (ATMEL_TC_CMR_ACTION_##a << 28)
+#define ATMEL_TC_CMR_BSWTRG_MSK GENMASK(31, 30)
+#define ATMEL_TC_CMR_BSWTRG(a) (ATMEL_TC_CMR_ACTION_##a << 30)
+#define ATMEL_TC_CMR_ACTION_NONE 0
+#define ATMEL_TC_CMR_ACTION_SET 1
+#define ATMEL_TC_CMR_ACTION_CLEAR 2
+#define ATMEL_TC_CMR_ACTION_TOGGLE 3
+
+/* SMMR fields */
+#define ATMEL_TC_SMMR_GCEN BIT(0)
+#define ATMEL_TC_SMMR_DOWN BIT(1)
+
+/* SR/IER/IDR/IMR fields */
+#define ATMEL_TC_COVFS BIT(0)
+#define ATMEL_TC_LOVRS BIT(1)
+#define ATMEL_TC_CPAS BIT(2)
+#define ATMEL_TC_CPBS BIT(3)
+#define ATMEL_TC_CPCS BIT(4)
+#define ATMEL_TC_LDRAS BIT(5)
+#define ATMEL_TC_LDRBS BIT(6)
+#define ATMEL_TC_ETRGS BIT(7)
+#define ATMEL_TC_CLKSTA BIT(16)
+#define ATMEL_TC_MTIOA BIT(17)
+#define ATMEL_TC_MTIOB BIT(18)
+
+/* EMR fields */
+#define ATMEL_TC_EMR_TRIGSRCA_MSK GENMASK(1, 0)
+#define ATMEL_TC_EMR_TRIGSRCA_TIOA 0
+#define ATMEL_TC_EMR_TRIGSRCA_PWMX 1
+#define ATMEL_TC_EMR_TRIGSRCB_MSK GENMASK(5, 4)
+#define ATMEL_TC_EMR_TRIGSRCB_TIOB (0 << 4)
+#define ATMEL_TC_EMR_TRIGSRCB_PWM (1 << 4)
+#define ATMEL_TC_EMR_NOCLKDIV BIT(8)
+
+/* BCR fields */
+#define ATMEL_TC_BCR_SYNC BIT(0)
+
+/* BMR fields */
+#define ATMEL_TC_BMR_TCXC_MSK(c) GENMASK(((c) * 2) + 1, (c) * 2)
+#define ATMEL_TC_BMR_TCXC(x, c) ((x) << (2 * (c)))
+#define ATMEL_TC_BMR_QDEN BIT(8)
+#define ATMEL_TC_BMR_POSEN BIT(9)
+#define ATMEL_TC_BMR_SPEEDEN BIT(10)
+#define ATMEL_TC_BMR_QDTRANS BIT(11)
+#define ATMEL_TC_BMR_EDGPHA BIT(12)
+#define ATMEL_TC_BMR_INVA BIT(13)
+#define ATMEL_TC_BMR_INVB BIT(14)
+#define ATMEL_TC_BMR_INVIDX BIT(15)
+#define ATMEL_TC_BMR_SWAP BIT(16)
+#define ATMEL_TC_BMR_IDXPHB BIT(17)
+#define ATMEL_TC_BMR_AUTOC BIT(18)
+#define ATMEL_TC_MAXFILT_MSK GENMASK(25, 20)
+#define ATMEL_TC_MAXFILT(x) (((x) - 1) << 20)
+#define ATMEL_TC_MAXCMP_MSK GENMASK(29, 26)
+#define ATMEL_TC_MAXCMP(x) ((x) << 26)
+
+/* QEDC fields */
+#define ATMEL_TC_QEDC_IDX BIT(0)
+#define ATMEL_TC_QEDC_DIRCHG BIT(1)
+#define ATMEL_TC_QEDC_QERR BIT(2)
+#define ATMEL_TC_QEDC_MPE BIT(3)
+#define ATMEL_TC_QEDC_DIR BIT(8)
+
+/* FMR fields */
+#define ATMEL_TC_FMR_ENCF(x) BIT(x)
+
+/* WPMR fields */
+#define ATMEL_TC_WPMR_WPKEY (0x54494d << 8)
+#define ATMEL_TC_WPMR_WPEN BIT(0)
+
+static inline struct clk *tcb_clk_get(struct device_node *node, int channel)
+{
+ struct clk *clk;
+ char clk_name[] = "t0_clk";
+
+ clk_name[1] += channel;
+ clk = of_clk_get_by_name(node->parent, clk_name);
+ if (!IS_ERR(clk))
+ return clk;
+
+ return of_clk_get_by_name(node->parent, "t0_clk");
+}
+
+static inline int tcb_irq_get(struct device_node *node, int channel)
+{
+ int irq;
+
+ irq = of_irq_get(node->parent, channel);
+ if (irq > 0)
+ return irq;
+
+ return of_irq_get(node->parent, 0);
+}
+
+static const u8 atmel_tc_divisors[5] = { 2, 8, 32, 128, 0, };
+
+struct atmel_tcb_info {
+ int bits;
+};
+
+static const struct atmel_tcb_info atmel_tcb_infos[] = {
+ { .bits = 16 },
+ { .bits = 32 },
+};
+
+static const struct of_device_id atmel_tcb_dt_ids[] = {
+ {
+ .compatible = "atmel,at91rm9200-tcb",
+ .data = &atmel_tcb_infos[0],
+ }, {
+ .compatible = "atmel,at91sam9x5-tcb",
+ .data = &atmel_tcb_infos[1],
+ }, {
+ /* sentinel */
+ }
+};
+
+#endif /* __SOC_ATMEL_TCB_H */

View File

@ -1,103 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 7 May 2018 16:51:09 +0200
Subject: [PATCH] bdi: use refcount_t for reference counting instead atomic_t
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
refcount_t type and corresponding API should be used instead of atomic_t when
the variable is used as a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free situations.
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/backing-dev-defs.h | 3 ++-
include/linux/backing-dev.h | 4 ++--
mm/backing-dev.c | 12 ++++++------
3 files changed, 10 insertions(+), 9 deletions(-)
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -12,6 +12,7 @@
#include <linux/timer.h>
#include <linux/workqueue.h>
#include <linux/kref.h>
+#include <linux/refcount.h>
struct page;
struct device;
@@ -76,7 +77,7 @@ enum wb_reason {
*/
struct bdi_writeback_congested {
unsigned long state; /* WB_[a]sync_congested flags */
- atomic_t refcnt; /* nr of attached wb's and blkg */
+ refcount_t refcnt; /* nr of attached wb's and blkg */
#ifdef CONFIG_CGROUP_WRITEBACK
struct backing_dev_info *__bdi; /* the associated bdi, set to NULL
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -403,13 +403,13 @@ static inline bool inode_cgwb_enabled(st
static inline struct bdi_writeback_congested *
wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
{
- atomic_inc(&bdi->wb_congested->refcnt);
+ refcount_inc(&bdi->wb_congested->refcnt);
return bdi->wb_congested;
}
static inline void wb_congested_put(struct bdi_writeback_congested *congested)
{
- if (atomic_dec_and_test(&congested->refcnt))
+ if (refcount_dec_and_test(&congested->refcnt))
kfree(congested);
}
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -460,10 +460,10 @@ wb_congested_get_create(struct backing_d
if (new_congested) {
/* !found and storage for new one already allocated, insert */
congested = new_congested;
- new_congested = NULL;
rb_link_node(&congested->rb_node, parent, node);
rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree);
- goto found;
+ spin_unlock_irqrestore(&cgwb_lock, flags);
+ return congested;
}
spin_unlock_irqrestore(&cgwb_lock, flags);
@@ -473,13 +473,13 @@ wb_congested_get_create(struct backing_d
if (!new_congested)
return NULL;
- atomic_set(&new_congested->refcnt, 0);
+ refcount_set(&new_congested->refcnt, 1);
new_congested->__bdi = bdi;
new_congested->blkcg_id = blkcg_id;
goto retry;
found:
- atomic_inc(&congested->refcnt);
+ refcount_inc(&congested->refcnt);
spin_unlock_irqrestore(&cgwb_lock, flags);
kfree(new_congested);
return congested;
@@ -496,7 +496,7 @@ void wb_congested_put(struct bdi_writeba
unsigned long flags;
local_irq_save(flags);
- if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
+ if (!refcount_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
local_irq_restore(flags);
return;
}
@@ -806,7 +806,7 @@ static int cgwb_bdi_init(struct backing_
if (!bdi->wb_congested)
return -ENOMEM;
- atomic_set(&bdi->wb_congested->refcnt, 1);
+ refcount_set(&bdi->wb_congested->refcnt, 1);
err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
if (err) {

View File

@ -1,153 +0,0 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 23 Feb 2018 21:54:18 -0500
Subject: [PATCH 01/17] get rid of trylock loop in locking dentries on shrink
list
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 3b3f09f48ba78c0634e929849860a6447d057eed
In case of trylock failure don't re-add to the list - drop the locks
and carefully get them in the right order. For shrink_dentry_list(),
somebody having grabbed a reference to dentry means that we can
kick it off-list, so if we find dentry being modified under us we
don't need to play silly buggers with retries anyway - off the list
it is.
The locking logics taken out into a helper of its own; lock_parent()
is no longer used for dentries that can be killed under us.
[fix from Eric Biggers folded]
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 106 ++++++++++++++++++++++++++++++++++++++----------------------
1 file changed, 68 insertions(+), 38 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -974,56 +974,86 @@ void d_prune_aliases(struct inode *inode
}
EXPORT_SYMBOL(d_prune_aliases);
-static void shrink_dentry_list(struct list_head *list)
+/*
+ * Lock a dentry from shrink list.
+ * Note that dentry is *not* protected from concurrent dentry_kill(),
+ * d_delete(), etc. It is protected from freeing (by the fact of
+ * being on a shrink list), but everything else is fair game.
+ * Return false if dentry has been disrupted or grabbed, leaving
+ * the caller to kick it off-list. Otherwise, return true and have
+ * that dentry's inode and parent both locked.
+ */
+static bool shrink_lock_dentry(struct dentry *dentry)
{
- struct dentry *dentry, *parent;
+ struct inode *inode;
+ struct dentry *parent;
+
+ if (dentry->d_lockref.count)
+ return false;
+ inode = dentry->d_inode;
+ if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
+ rcu_read_lock(); /* to protect inode */
+ spin_unlock(&dentry->d_lock);
+ spin_lock(&inode->i_lock);
+ spin_lock(&dentry->d_lock);
+ if (unlikely(dentry->d_lockref.count))
+ goto out;
+ /* changed inode means that somebody had grabbed it */
+ if (unlikely(inode != dentry->d_inode))
+ goto out;
+ rcu_read_unlock();
+ }
+
+ parent = dentry->d_parent;
+ if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock)))
+ return true;
+
+ rcu_read_lock(); /* to protect parent */
+ spin_unlock(&dentry->d_lock);
+ parent = READ_ONCE(dentry->d_parent);
+ spin_lock(&parent->d_lock);
+ if (unlikely(parent != dentry->d_parent)) {
+ spin_unlock(&parent->d_lock);
+ spin_lock(&dentry->d_lock);
+ goto out;
+ }
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ if (likely(!dentry->d_lockref.count)) {
+ rcu_read_unlock();
+ return true;
+ }
+ spin_unlock(&parent->d_lock);
+out:
+ if (inode)
+ spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
+ return false;
+}
+
+static void shrink_dentry_list(struct list_head *list)
+{
while (!list_empty(list)) {
+ struct dentry *dentry, *parent;
struct inode *inode;
+
dentry = list_entry(list->prev, struct dentry, d_lru);
spin_lock(&dentry->d_lock);
- parent = lock_parent(dentry);
-
- /*
- * The dispose list is isolated and dentries are not accounted
- * to the LRU here, so we can simply remove it from the list
- * here regardless of whether it is referenced or not.
- */
- d_shrink_del(dentry);
-
- /*
- * We found an inuse dentry which was not removed from
- * the LRU because of laziness during lookup. Do not free it.
- */
- if (dentry->d_lockref.count > 0) {
+ if (!shrink_lock_dentry(dentry)) {
+ bool can_free = false;
+ d_shrink_del(dentry);
+ if (dentry->d_lockref.count < 0)
+ can_free = dentry->d_flags & DCACHE_MAY_FREE;
spin_unlock(&dentry->d_lock);
- if (parent)
- spin_unlock(&parent->d_lock);
- continue;
- }
-
-
- if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
- bool can_free = dentry->d_flags & DCACHE_MAY_FREE;
- spin_unlock(&dentry->d_lock);
- if (parent)
- spin_unlock(&parent->d_lock);
if (can_free)
dentry_free(dentry);
continue;
}
-
- inode = dentry->d_inode;
- if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
- d_shrink_add(dentry, list);
- spin_unlock(&dentry->d_lock);
- if (parent)
- spin_unlock(&parent->d_lock);
- continue;
- }
-
+ d_shrink_del(dentry);
+ parent = dentry->d_parent;
__dentry_kill(dentry);
-
+ if (parent == dentry)
+ continue;
/*
* We need to prune ancestors too. This is necessary to prevent
* quadratic behavior of shrink_dcache_parent(), but is also

View File

@ -1,33 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:33 +0100
Subject: [PATCH 01/10] iommu/amd: Take into account that alloc_dev_data() may
return NULL
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 39ffe39545cd5cb5b8cee9f0469165cf24dc62c2
find_dev_data() does not check whether the return value alloc_dev_data()
is NULL. This was okay once because the pointer was returned once as-is.
Since commit df3f7a6e8e85 ("iommu/amd: Use is_attach_deferred
call-back") the pointer may be used within find_dev_data() so a NULL
check is required.
Cc: Baoquan He <bhe@redhat.com>
Fixes: df3f7a6e8e85 ("iommu/amd: Use is_attach_deferred call-back")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 2 ++
1 file changed, 2 insertions(+)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -310,6 +310,8 @@ static struct iommu_dev_data *find_dev_d
if (dev_data == NULL) {
dev_data = alloc_dev_data(devid);
+ if (!dev_data)
+ return NULL;
if (translation_pre_enabled(iommu))
dev_data->defer_attach = true;

View File

@ -1,171 +0,0 @@
From: Scott Wood <swood@redhat.com>
Date: Sun, 21 Jan 2018 03:28:54 -0600
Subject: [PATCH 1/3] iommu/amd: Use raw locks on atomic context paths
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 27790398c2aed917828dc3c6f81240d57f1584c9
Several functions in this driver are called from atomic context,
and thus raw locks must be used in order to be safe on PREEMPT_RT.
This includes paths that must wait for command completion, which is
a potential PREEMPT_RT latency concern but not easily avoidable.
Signed-off-by: Scott Wood <swood@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 30 +++++++++++++++---------------
drivers/iommu/amd_iommu_init.c | 2 +-
drivers/iommu/amd_iommu_types.h | 4 ++--
3 files changed, 18 insertions(+), 18 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1056,9 +1056,9 @@ static int iommu_queue_command_sync(stru
unsigned long flags;
int ret;
- spin_lock_irqsave(&iommu->lock, flags);
+ raw_spin_lock_irqsave(&iommu->lock, flags);
ret = __iommu_queue_command_sync(iommu, cmd, sync);
- spin_unlock_irqrestore(&iommu->lock, flags);
+ raw_spin_unlock_irqrestore(&iommu->lock, flags);
return ret;
}
@@ -1084,7 +1084,7 @@ static int iommu_completion_wait(struct
build_completion_wait(&cmd, (u64)&iommu->cmd_sem);
- spin_lock_irqsave(&iommu->lock, flags);
+ raw_spin_lock_irqsave(&iommu->lock, flags);
iommu->cmd_sem = 0;
@@ -1095,7 +1095,7 @@ static int iommu_completion_wait(struct
ret = wait_on_sem(&iommu->cmd_sem);
out_unlock:
- spin_unlock_irqrestore(&iommu->lock, flags);
+ raw_spin_unlock_irqrestore(&iommu->lock, flags);
return ret;
}
@@ -3627,7 +3627,7 @@ static struct irq_remap_table *get_irq_t
goto out_unlock;
/* Initialize table spin-lock */
- spin_lock_init(&table->lock);
+ raw_spin_lock_init(&table->lock);
if (ioapic)
/* Keep the first 32 indexes free for IOAPIC interrupts */
@@ -3689,7 +3689,7 @@ static int alloc_irq_index(u16 devid, in
if (align)
alignment = roundup_pow_of_two(count);
- spin_lock_irqsave(&table->lock, flags);
+ raw_spin_lock_irqsave(&table->lock, flags);
/* Scan table for free entries */
for (index = ALIGN(table->min_index, alignment), c = 0;
@@ -3716,7 +3716,7 @@ static int alloc_irq_index(u16 devid, in
index = -ENOSPC;
out:
- spin_unlock_irqrestore(&table->lock, flags);
+ raw_spin_unlock_irqrestore(&table->lock, flags);
return index;
}
@@ -3737,7 +3737,7 @@ static int modify_irte_ga(u16 devid, int
if (!table)
return -ENOMEM;
- spin_lock_irqsave(&table->lock, flags);
+ raw_spin_lock_irqsave(&table->lock, flags);
entry = (struct irte_ga *)table->table;
entry = &entry[index];
@@ -3748,7 +3748,7 @@ static int modify_irte_ga(u16 devid, int
if (data)
data->ref = entry;
- spin_unlock_irqrestore(&table->lock, flags);
+ raw_spin_unlock_irqrestore(&table->lock, flags);
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
@@ -3770,9 +3770,9 @@ static int modify_irte(u16 devid, int in
if (!table)
return -ENOMEM;
- spin_lock_irqsave(&table->lock, flags);
+ raw_spin_lock_irqsave(&table->lock, flags);
table->table[index] = irte->val;
- spin_unlock_irqrestore(&table->lock, flags);
+ raw_spin_unlock_irqrestore(&table->lock, flags);
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
@@ -3794,9 +3794,9 @@ static void free_irte(u16 devid, int ind
if (!table)
return;
- spin_lock_irqsave(&table->lock, flags);
+ raw_spin_lock_irqsave(&table->lock, flags);
iommu->irte_ops->clear_allocated(table, index);
- spin_unlock_irqrestore(&table->lock, flags);
+ raw_spin_unlock_irqrestore(&table->lock, flags);
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
@@ -4397,7 +4397,7 @@ int amd_iommu_update_ga(int cpu, bool is
if (!irt)
return -ENODEV;
- spin_lock_irqsave(&irt->lock, flags);
+ raw_spin_lock_irqsave(&irt->lock, flags);
if (ref->lo.fields_vapic.guest_mode) {
if (cpu >= 0)
@@ -4406,7 +4406,7 @@ int amd_iommu_update_ga(int cpu, bool is
barrier();
}
- spin_unlock_irqrestore(&irt->lock, flags);
+ raw_spin_unlock_irqrestore(&irt->lock, flags);
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1474,7 +1474,7 @@ static int __init init_iommu_one(struct
{
int ret;
- spin_lock_init(&iommu->lock);
+ raw_spin_lock_init(&iommu->lock);
/* Add IOMMU to internal data structures */
list_add_tail(&iommu->list, &amd_iommu_list);
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -408,7 +408,7 @@ extern bool amd_iommu_iotlb_sup;
#define IRQ_TABLE_ALIGNMENT 128
struct irq_remap_table {
- spinlock_t lock;
+ raw_spinlock_t lock;
unsigned min_index;
u32 *table;
};
@@ -490,7 +490,7 @@ struct amd_iommu {
int index;
/* locks the accesses to the hardware */
- spinlock_t lock;
+ raw_spinlock_t lock;
/* Pointer to PCI device of this IOMMU */
struct pci_dev *dev;

View File

@ -1,273 +0,0 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 20 Dec 2017 17:12:50 +0100
Subject: [PATCH 01/29] timers: Use static keys for migrate_enable/nohz_active
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
The members migrate_enable and nohz_active in the timer/hrtimer per CPU
bases have been introduced to avoid accessing global variables for these
decisions.
Still that results in a (cache hot) load and conditional branch, which can
be avoided by using static keys.
Implement it with static keys and optimize for the most critical case of
high performance networking which tends to disable the timer migration
functionality.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/hrtimer.h | 4 --
kernel/time/hrtimer.c | 17 ++-------
kernel/time/tick-internal.h | 19 ++++++----
kernel/time/tick-sched.c | 2 -
kernel/time/timer.c | 83 ++++++++++++++++++++++----------------------
5 files changed, 61 insertions(+), 64 deletions(-)
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -153,8 +153,6 @@ enum hrtimer_base_type {
* @cpu: cpu number
* @active_bases: Bitfield to mark bases with active timers
* @clock_was_set_seq: Sequence counter of clock was set events
- * @migration_enabled: The migration of hrtimers to other cpus is enabled
- * @nohz_active: The nohz functionality is enabled
* @expires_next: absolute time of the next event which was scheduled
* via clock_set_next_event()
* @next_timer: Pointer to the first expiring timer
@@ -178,8 +176,6 @@ struct hrtimer_cpu_base {
unsigned int cpu;
unsigned int active_bases;
unsigned int clock_was_set_seq;
- bool migration_enabled;
- bool nohz_active;
#ifdef CONFIG_HIGH_RES_TIMERS
unsigned int in_hrtirq : 1,
hres_active : 1,
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -178,23 +178,16 @@ hrtimer_check_target(struct hrtimer *tim
#endif
}
-#ifdef CONFIG_NO_HZ_COMMON
-static inline
-struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
- int pinned)
-{
- if (pinned || !base->migration_enabled)
- return base;
- return &per_cpu(hrtimer_bases, get_nohz_timer_target());
-}
-#else
static inline
struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
int pinned)
{
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+ if (static_branch_unlikely(&timers_migration_enabled) && !pinned)
+ return &per_cpu(hrtimer_bases, get_nohz_timer_target());
+#endif
return base;
}
-#endif
/*
* We switch the timer base to a power-optimized selected CPU target,
@@ -971,7 +964,7 @@ void hrtimer_start_range_ns(struct hrtim
* Kick to reschedule the next tick to handle the new timer
* on dynticks target.
*/
- if (new_base->cpu_base->nohz_active)
+ if (is_timers_nohz_active())
wake_up_nohz_cpu(new_base->cpu_base->cpu);
} else {
hrtimer_reprogram(timer, new_base);
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -150,14 +150,19 @@ static inline void tick_nohz_init(void)
#ifdef CONFIG_NO_HZ_COMMON
extern unsigned long tick_nohz_active;
-#else
+extern void timers_update_nohz(void);
+extern struct static_key_false timers_nohz_active;
+static inline bool is_timers_nohz_active(void)
+{
+ return static_branch_unlikely(&timers_nohz_active);
+}
+# ifdef CONFIG_SMP
+extern struct static_key_false timers_migration_enabled;
+# endif
+#else /* CONFIG_NO_HZ_COMMON */
+static inline void timers_update_nohz(void) { }
#define tick_nohz_active (0)
-#endif
-
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-extern void timers_update_migration(bool update_nohz);
-#else
-static inline void timers_update_migration(bool update_nohz) { }
+static inline bool is_timers_nohz_active(void) { return false; }
#endif
DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1131,7 +1131,7 @@ static inline void tick_nohz_activate(st
ts->nohz_mode = mode;
/* One update is enough */
if (!test_and_set_bit(0, &tick_nohz_active))
- timers_update_migration(true);
+ timers_update_nohz();
}
/**
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -200,8 +200,6 @@ struct timer_base {
unsigned long clk;
unsigned long next_expiry;
unsigned int cpu;
- bool migration_enabled;
- bool nohz_active;
bool is_idle;
bool must_forward_clk;
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
@@ -210,45 +208,59 @@ struct timer_base {
static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+#ifdef CONFIG_NO_HZ_COMMON
+
+DEFINE_STATIC_KEY_FALSE(timers_nohz_active);
+static DEFINE_MUTEX(timer_keys_mutex);
+
+static void timer_update_keys(struct work_struct *work);
+static DECLARE_WORK(timer_update_work, timer_update_keys);
+
+#ifdef CONFIG_SMP
unsigned int sysctl_timer_migration = 1;
-void timers_update_migration(bool update_nohz)
+DEFINE_STATIC_KEY_FALSE(timers_migration_enabled);
+
+static void timers_update_migration(void)
{
bool on = sysctl_timer_migration && tick_nohz_active;
- unsigned int cpu;
- /* Avoid the loop, if nothing to update */
- if (this_cpu_read(timer_bases[BASE_STD].migration_enabled) == on)
- return;
+ if (on)
+ static_branch_enable(&timers_migration_enabled);
+ else
+ static_branch_disable(&timers_migration_enabled);
+}
+#else
+static inline void timers_update_migration(void) { }
+#endif /* !CONFIG_SMP */
- for_each_possible_cpu(cpu) {
- per_cpu(timer_bases[BASE_STD].migration_enabled, cpu) = on;
- per_cpu(timer_bases[BASE_DEF].migration_enabled, cpu) = on;
- per_cpu(hrtimer_bases.migration_enabled, cpu) = on;
- if (!update_nohz)
- continue;
- per_cpu(timer_bases[BASE_STD].nohz_active, cpu) = true;
- per_cpu(timer_bases[BASE_DEF].nohz_active, cpu) = true;
- per_cpu(hrtimer_bases.nohz_active, cpu) = true;
- }
+static void timer_update_keys(struct work_struct *work)
+{
+ mutex_lock(&timer_keys_mutex);
+ timers_update_migration();
+ static_branch_enable(&timers_nohz_active);
+ mutex_unlock(&timer_keys_mutex);
+}
+
+void timers_update_nohz(void)
+{
+ schedule_work(&timer_update_work);
}
int timer_migration_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
- static DEFINE_MUTEX(mutex);
int ret;
- mutex_lock(&mutex);
+ mutex_lock(&timer_keys_mutex);
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write)
- timers_update_migration(false);
- mutex_unlock(&mutex);
+ timers_update_migration();
+ mutex_unlock(&timer_keys_mutex);
return ret;
}
-#endif
+#endif /* NO_HZ_COMMON */
static unsigned long round_jiffies_common(unsigned long j, int cpu,
bool force_up)
@@ -534,7 +546,7 @@ static void
static void
trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer)
{
- if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active)
+ if (!is_timers_nohz_active())
return;
/*
@@ -840,21 +852,20 @@ static inline struct timer_base *get_tim
return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK);
}
-#ifdef CONFIG_NO_HZ_COMMON
static inline struct timer_base *
get_target_base(struct timer_base *base, unsigned tflags)
{
-#ifdef CONFIG_SMP
- if ((tflags & TIMER_PINNED) || !base->migration_enabled)
- return get_timer_this_cpu_base(tflags);
- return get_timer_cpu_base(tflags, get_nohz_timer_target());
-#else
- return get_timer_this_cpu_base(tflags);
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+ if (static_branch_unlikely(&timers_migration_enabled) &&
+ !(tflags & TIMER_PINNED))
+ return get_timer_cpu_base(tflags, get_nohz_timer_target());
#endif
+ return get_timer_this_cpu_base(tflags);
}
static inline void forward_timer_base(struct timer_base *base)
{
+#ifdef CONFIG_NO_HZ_COMMON
unsigned long jnow;
/*
@@ -878,16 +889,8 @@ static inline void forward_timer_base(st
base->clk = jnow;
else
base->clk = base->next_expiry;
-}
-#else
-static inline struct timer_base *
-get_target_base(struct timer_base *base, unsigned tflags)
-{
- return get_timer_this_cpu_base(tflags);
-}
-
-static inline void forward_timer_base(struct timer_base *base) { }
#endif
+}
/*

View File

@ -1,50 +0,0 @@
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 16 May 2018 09:36:43 -0400
Subject: [PATCH 1/5] tracing: Add field modifier parsing hist error for hist
triggers
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
From: Tom Zanussi <tom.zanussi@linux.intel.com>
[ commit dcf234577cd31fa16874e828b90659166ad6b80d ]
If the user specifies an invalid field modifier for a hist trigger,
the current code correctly flags that as an error, but doesn't tell
the user what happened.
Fix this by invoking hist_err() with an appropriate message when
invalid modifiers are specified.
Before:
# echo 'hist:keys=pid:ts0=common_timestamp.junkusecs' >> /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger
-su: echo: write error: Invalid argument
# cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/hist
After:
# echo 'hist:keys=pid:ts0=common_timestamp.junkusecs' >> /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger
-su: echo: write error: Invalid argument
# cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/hist
ERROR: Invalid field modifier: junkusecs
Last command: keys=pid:ts0=common_timestamp.junkusecs
Link: http://lkml.kernel.org/r/b043c59fa79acd06a5f14a1d44dee9e5a3cd1248.1524790601.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 1 +
1 file changed, 1 insertion(+)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -2466,6 +2466,7 @@ parse_field(struct hist_trigger_data *hi
else if (strcmp(modifier, "usecs") == 0)
*flags |= HIST_FIELD_FL_TIMESTAMP_USECS;
else {
+ hist_err("Invalid field modifier: ", modifier);
field = ERR_PTR(-EINVAL);
goto out;
}

View File

@ -1,128 +0,0 @@
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 22 Sep 2017 14:58:15 -0500
Subject: [PATCH 01/42] tracing: Steve's unofficial trace_recursive_lock()
patch
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
On Tue, 5 Sep 2017 16:57:52 -0500
Tom Zanussi <tom.zanussi@linux.intel.com> wrote:
> Synthetic event generation requires the reservation of a second event
> while the reservation of a previous event is still in progress. The
> trace_recursive_lock() check in ring_buffer_lock_reserve() prevents
> this however.
>
> This sets up a special reserve pathway for this particular case,
> leaving existing pathways untouched, other than an additional check in
> ring_buffer_lock_reserve() and trace_event_buffer_reserve(). These
> checks could be gotten rid of as well, with copies of those functions,
> but for now try to avoid that unless necessary.
>
> Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
I've been planing on changing that lock, which may help you here
without having to mess around with parameters. That is to simply add a
counter. Would this patch help you. You can add a patch to increment
the count to 5 with an explanation of handling synthetic events, but
even getting to 4 is extremely unlikely.
I'll make this into an official patch if this works for you, and then
you can include it in your series.
-- Steve
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/ring_buffer.c | 66 ++++++++++++---------------------------------
1 file changed, 18 insertions(+), 48 deletions(-)
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2542,61 +2542,29 @@ rb_wakeups(struct ring_buffer *buffer, s
* The lock and unlock are done within a preempt disable section.
* The current_context per_cpu variable can only be modified
* by the current task between lock and unlock. But it can
- * be modified more than once via an interrupt. To pass this
- * information from the lock to the unlock without having to
- * access the 'in_interrupt()' functions again (which do show
- * a bit of overhead in something as critical as function tracing,
- * we use a bitmask trick.
+ * be modified more than once via an interrupt. There are four
+ * different contexts that we need to consider.
*
- * bit 0 = NMI context
- * bit 1 = IRQ context
- * bit 2 = SoftIRQ context
- * bit 3 = normal context.
- *
- * This works because this is the order of contexts that can
- * preempt other contexts. A SoftIRQ never preempts an IRQ
- * context.
- *
- * When the context is determined, the corresponding bit is
- * checked and set (if it was set, then a recursion of that context
- * happened).
- *
- * On unlock, we need to clear this bit. To do so, just subtract
- * 1 from the current_context and AND it to itself.
- *
- * (binary)
- * 101 - 1 = 100
- * 101 & 100 = 100 (clearing bit zero)
- *
- * 1010 - 1 = 1001
- * 1010 & 1001 = 1000 (clearing bit 1)
- *
- * The least significant bit can be cleared this way, and it
- * just so happens that it is the same bit corresponding to
- * the current context.
+ * Normal context.
+ * SoftIRQ context
+ * IRQ context
+ * NMI context
+ *
+ * If for some reason the ring buffer starts to recurse, we
+ * only allow that to happen at most 4 times (one for each
+ * context). If it happens 5 times, then we consider this a
+ * recusive loop and do not let it go further.
*/
static __always_inline int
trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
{
- unsigned int val = cpu_buffer->current_context;
- int bit;
-
- if (in_interrupt()) {
- if (in_nmi())
- bit = RB_CTX_NMI;
- else if (in_irq())
- bit = RB_CTX_IRQ;
- else
- bit = RB_CTX_SOFTIRQ;
- } else
- bit = RB_CTX_NORMAL;
-
- if (unlikely(val & (1 << bit)))
+ if (cpu_buffer->current_context >= 4)
return 1;
- val |= (1 << bit);
- cpu_buffer->current_context = val;
+ cpu_buffer->current_context++;
+ /* Interrupts must see this update */
+ barrier();
return 0;
}
@@ -2604,7 +2572,9 @@ trace_recursive_lock(struct ring_buffer_
static __always_inline void
trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
{
- cpu_buffer->current_context &= cpu_buffer->current_context - 1;
+ /* Don't let the dec leak out */
+ barrier();
+ cpu_buffer->current_context--;
}
/**

View File

@ -1,672 +0,0 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:39 +0200
Subject: [PATCH 2/6] clocksource/drivers: Add a new driver for the Atmel ARM
TC blocks
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Add a driver for the Atmel Timer Counter Blocks. This driver provides a
clocksource and two clockevent devices.
One of the clockevent device is linked to the clocksource counter and so it
will run at the same frequency. This will be used when there is only on TCB
channel available for timers.
The other clockevent device runs on a separate TCB channel when available.
This driver uses regmap and syscon to be able to probe early in the boot
and avoid having to switch on the TCB clocksource later. Using regmap also
means that unused TCB channels may be used by other drivers (PWM for
example). read/writel are still used to access channel specific registers
to avoid the performance impact of regmap (mainly locking).
Tested-by: Alexander Dahl <ada@thorsis.com>
Tested-by: Andras Szemzo <szemzo.andras@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/clocksource/Kconfig | 8
drivers/clocksource/Makefile | 3
drivers/clocksource/timer-atmel-tcb.c | 608 ++++++++++++++++++++++++++++++++++
3 files changed, 618 insertions(+), 1 deletion(-)
create mode 100644 drivers/clocksource/timer-atmel-tcb.c
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -392,6 +392,14 @@ config ATMEL_ST
help
Support for the Atmel ST timer.
+config ATMEL_ARM_TCB_CLKSRC
+ bool "Microchip ARM TC Block" if COMPILE_TEST
+ select REGMAP_MMIO
+ depends on GENERIC_CLOCKEVENTS
+ help
+ This enables build of clocksource and clockevent driver for
+ the integrated Timer Counter Blocks in Microchip ARM SoCs.
+
config CLKSRC_METAG_GENERIC
def_bool y if METAG
help
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -3,7 +3,8 @@ obj-$(CONFIG_TIMER_OF) += timer-of.o
obj-$(CONFIG_TIMER_PROBE) += timer-probe.o
obj-$(CONFIG_ATMEL_PIT) += timer-atmel-pit.o
obj-$(CONFIG_ATMEL_ST) += timer-atmel-st.o
-obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o
+obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o
+obj-$(CONFIG_ATMEL_ARM_TCB_CLKSRC) += timer-atmel-tcb.o
obj-$(CONFIG_X86_PM_TIMER) += acpi_pm.o
obj-$(CONFIG_SCx200HR_TIMER) += scx200_hrt.o
obj-$(CONFIG_CS5535_CLOCK_EVENT_SRC) += cs5535-clockevt.o
--- /dev/null
+++ b/drivers/clocksource/timer-atmel-tcb.c
@@ -0,0 +1,608 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/clk.h>
+#include <linux/clockchips.h>
+#include <linux/clocksource.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/regmap.h>
+#include <linux/sched_clock.h>
+#include <soc/at91/atmel_tcb.h>
+
+static struct atmel_tcb_clksrc {
+ struct clocksource clksrc;
+ struct clock_event_device clkevt;
+ struct regmap *regmap;
+ void __iomem *base;
+ struct clk *clk[2];
+ char name[20];
+ int channels[2];
+ int bits;
+ int irq;
+ struct {
+ u32 cmr;
+ u32 imr;
+ u32 rc;
+ bool clken;
+ } cache[2];
+ u32 bmr_cache;
+ bool registered;
+} tc = {
+ .clksrc = {
+ .rating = 200,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ },
+ .clkevt = {
+ .features = CLOCK_EVT_FEAT_ONESHOT,
+ /* Should be lower than at91rm9200's system timer */
+ .rating = 125,
+ },
+};
+
+static struct tc_clkevt_device {
+ struct clock_event_device clkevt;
+ struct regmap *regmap;
+ void __iomem *base;
+ struct clk *slow_clk;
+ struct clk *clk;
+ char name[20];
+ int channel;
+ int irq;
+ struct {
+ u32 cmr;
+ u32 imr;
+ u32 rc;
+ bool clken;
+ } cache;
+ bool registered;
+} tce = {
+ .clkevt = {
+ .features = CLOCK_EVT_FEAT_PERIODIC |
+ CLOCK_EVT_FEAT_ONESHOT,
+ /*
+ * Should be lower than at91rm9200's system timer
+ * but higher than tc.clkevt.rating
+ */
+ .rating = 140,
+ },
+};
+
+/*
+ * Clockevent device using its own channel
+ */
+static int tc_clkevt2_shutdown(struct clock_event_device *d)
+{
+ writel(0xff, tce.base + ATMEL_TC_IDR(tce.channel));
+ writel(ATMEL_TC_CCR_CLKDIS, tce.base + ATMEL_TC_CCR(tce.channel));
+ if (!clockevent_state_detached(d))
+ clk_disable(tce.clk);
+
+ return 0;
+}
+
+/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
+ * because using one of the divided clocks would usually mean the
+ * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
+ *
+ * A divided clock could be good for high resolution timers, since
+ * 30.5 usec resolution can seem "low".
+ */
+static int tc_clkevt2_set_oneshot(struct clock_event_device *d)
+{
+ if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
+ tc_clkevt2_shutdown(d);
+
+ clk_enable(tce.clk);
+
+ /* slow clock, count up to RC, then irq and stop */
+ writel(ATMEL_TC_CMR_TCLK(4) | ATMEL_TC_CMR_CPCSTOP |
+ ATMEL_TC_CMR_WAVE | ATMEL_TC_CMR_WAVESEL_UPRC,
+ tce.base + ATMEL_TC_CMR(tce.channel));
+ writel(ATMEL_TC_CPCS, tce.base + ATMEL_TC_IER(tce.channel));
+
+ return 0;
+}
+
+static int tc_clkevt2_set_periodic(struct clock_event_device *d)
+{
+ if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
+ tc_clkevt2_shutdown(d);
+
+ /* By not making the gentime core emulate periodic mode on top
+ * of oneshot, we get lower overhead and improved accuracy.
+ */
+ clk_enable(tce.clk);
+
+ /* slow clock, count up to RC, then irq and restart */
+ writel(ATMEL_TC_CMR_TCLK(4) | ATMEL_TC_CMR_WAVE |
+ ATMEL_TC_CMR_WAVESEL_UPRC,
+ tce.base + ATMEL_TC_CMR(tce.channel));
+ writel((32768 + HZ / 2) / HZ, tce.base + ATMEL_TC_RC(tce.channel));
+
+ /* Enable clock and interrupts on RC compare */
+ writel(ATMEL_TC_CPCS, tce.base + ATMEL_TC_IER(tce.channel));
+ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG,
+ tce.base + ATMEL_TC_CCR(tce.channel));
+
+ return 0;
+}
+
+static int tc_clkevt2_next_event(unsigned long delta,
+ struct clock_event_device *d)
+{
+ writel(delta, tce.base + ATMEL_TC_RC(tce.channel));
+ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG,
+ tce.base + ATMEL_TC_CCR(tce.channel));
+
+ return 0;
+}
+
+static irqreturn_t tc_clkevt2_irq(int irq, void *handle)
+{
+ unsigned int sr;
+
+ sr = readl(tce.base + ATMEL_TC_SR(tce.channel));
+ if (sr & ATMEL_TC_CPCS) {
+ tce.clkevt.event_handler(&tce.clkevt);
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_NONE;
+}
+
+static void tc_clkevt2_suspend(struct clock_event_device *d)
+{
+ tce.cache.cmr = readl(tce.base + ATMEL_TC_CMR(tce.channel));
+ tce.cache.imr = readl(tce.base + ATMEL_TC_IMR(tce.channel));
+ tce.cache.rc = readl(tce.base + ATMEL_TC_RC(tce.channel));
+ tce.cache.clken = !!(readl(tce.base + ATMEL_TC_SR(tce.channel)) &
+ ATMEL_TC_CLKSTA);
+}
+
+static void tc_clkevt2_resume(struct clock_event_device *d)
+{
+ /* Restore registers for the channel, RA and RB are not used */
+ writel(tce.cache.cmr, tc.base + ATMEL_TC_CMR(tce.channel));
+ writel(tce.cache.rc, tc.base + ATMEL_TC_RC(tce.channel));
+ writel(0, tc.base + ATMEL_TC_RA(tce.channel));
+ writel(0, tc.base + ATMEL_TC_RB(tce.channel));
+ /* Disable all the interrupts */
+ writel(0xff, tc.base + ATMEL_TC_IDR(tce.channel));
+ /* Reenable interrupts that were enabled before suspending */
+ writel(tce.cache.imr, tc.base + ATMEL_TC_IER(tce.channel));
+
+ /* Start the clock if it was used */
+ if (tce.cache.clken)
+ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG,
+ tc.base + ATMEL_TC_CCR(tce.channel));
+}
+
+static int __init tc_clkevt_register(struct device_node *node,
+ struct regmap *regmap, void __iomem *base,
+ int channel, int irq, int bits)
+{
+ int ret;
+
+ tce.regmap = regmap;
+ tce.base = base;
+ tce.channel = channel;
+ tce.irq = irq;
+
+ tce.slow_clk = of_clk_get_by_name(node->parent, "slow_clk");
+ if (IS_ERR(tce.slow_clk))
+ return PTR_ERR(tce.slow_clk);
+
+ ret = clk_prepare_enable(tce.slow_clk);
+ if (ret)
+ return ret;
+
+ tce.clk = tcb_clk_get(node, tce.channel);
+ if (IS_ERR(tce.clk)) {
+ ret = PTR_ERR(tce.clk);
+ goto err_slow;
+ }
+
+ snprintf(tce.name, sizeof(tce.name), "%s:%d",
+ kbasename(node->parent->full_name), channel);
+ tce.clkevt.cpumask = cpumask_of(0);
+ tce.clkevt.name = tce.name;
+ tce.clkevt.set_next_event = tc_clkevt2_next_event,
+ tce.clkevt.set_state_shutdown = tc_clkevt2_shutdown,
+ tce.clkevt.set_state_periodic = tc_clkevt2_set_periodic,
+ tce.clkevt.set_state_oneshot = tc_clkevt2_set_oneshot,
+ tce.clkevt.suspend = tc_clkevt2_suspend,
+ tce.clkevt.resume = tc_clkevt2_resume,
+
+ /* try to enable clk to avoid future errors in mode change */
+ ret = clk_prepare_enable(tce.clk);
+ if (ret)
+ goto err_slow;
+ clk_disable(tce.clk);
+
+ clockevents_config_and_register(&tce.clkevt, 32768, 1, BIT(bits) - 1);
+
+ ret = request_irq(tce.irq, tc_clkevt2_irq, IRQF_TIMER | IRQF_SHARED,
+ tce.clkevt.name, &tce);
+ if (ret)
+ goto err_clk;
+
+ tce.registered = true;
+
+ return 0;
+
+err_clk:
+ clk_unprepare(tce.clk);
+err_slow:
+ clk_disable_unprepare(tce.slow_clk);
+
+ return ret;
+}
+
+/*
+ * Clocksource and clockevent using the same channel(s)
+ */
+static u64 tc_get_cycles(struct clocksource *cs)
+{
+ u32 lower, upper;
+
+ do {
+ upper = readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[1]));
+ lower = readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[0]));
+ } while (upper != readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[1])));
+
+ return (upper << 16) | lower;
+}
+
+static u64 tc_get_cycles32(struct clocksource *cs)
+{
+ return readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[0]));
+}
+
+static u64 notrace tc_sched_clock_read(void)
+{
+ return tc_get_cycles(&tc.clksrc);
+}
+
+static u64 notrace tc_sched_clock_read32(void)
+{
+ return tc_get_cycles32(&tc.clksrc);
+}
+
+static int tcb_clkevt_next_event(unsigned long delta,
+ struct clock_event_device *d)
+{
+ u32 old, next, cur;
+
+
+ old = readl(tc.base + ATMEL_TC_CV(tc.channels[0]));
+ next = old + delta;
+ writel(next, tc.base + ATMEL_TC_RC(tc.channels[0]));
+ cur = readl(tc.base + ATMEL_TC_CV(tc.channels[0]));
+
+ /* check whether the delta elapsed while setting the register */
+ if ((next < old && cur < old && cur > next) ||
+ (next > old && (cur < old || cur > next))) {
+ /*
+ * Clear the CPCS bit in the status register to avoid
+ * generating a spurious interrupt next time a valid
+ * timer event is configured.
+ */
+ old = readl(tc.base + ATMEL_TC_SR(tc.channels[0]));
+ return -ETIME;
+ }
+
+ writel(ATMEL_TC_CPCS, tc.base + ATMEL_TC_IER(tc.channels[0]));
+
+ return 0;
+}
+
+static irqreturn_t tc_clkevt_irq(int irq, void *handle)
+{
+ unsigned int sr;
+
+ sr = readl(tc.base + ATMEL_TC_SR(tc.channels[0]));
+ if (sr & ATMEL_TC_CPCS) {
+ tc.clkevt.event_handler(&tc.clkevt);
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_NONE;
+}
+
+static int tcb_clkevt_oneshot(struct clock_event_device *dev)
+{
+ if (clockevent_state_oneshot(dev))
+ return 0;
+
+ /*
+ * Because both clockevent devices may share the same IRQ, we don't want
+ * the less likely one to stay requested
+ */
+ return request_irq(tc.irq, tc_clkevt_irq, IRQF_TIMER | IRQF_SHARED,
+ tc.name, &tc);
+}
+
+static int tcb_clkevt_shutdown(struct clock_event_device *dev)
+{
+ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[0]));
+ if (tc.bits == 16)
+ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[1]));
+
+ if (!clockevent_state_detached(dev))
+ free_irq(tc.irq, &tc);
+
+ return 0;
+}
+
+static void __init tcb_setup_dual_chan(struct atmel_tcb_clksrc *tc,
+ int mck_divisor_idx)
+{
+ /* first channel: waveform mode, input mclk/8, clock TIOA on overflow */
+ writel(mck_divisor_idx /* likely divide-by-8 */
+ | ATMEL_TC_CMR_WAVE
+ | ATMEL_TC_CMR_WAVESEL_UP /* free-run */
+ | ATMEL_TC_CMR_ACPA(SET) /* TIOA rises at 0 */
+ | ATMEL_TC_CMR_ACPC(CLEAR), /* (duty cycle 50%) */
+ tc->base + ATMEL_TC_CMR(tc->channels[0]));
+ writel(0x0000, tc->base + ATMEL_TC_RA(tc->channels[0]));
+ writel(0x8000, tc->base + ATMEL_TC_RC(tc->channels[0]));
+ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[0])); /* no irqs */
+ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[0]));
+
+ /* second channel: waveform mode, input TIOA */
+ writel(ATMEL_TC_CMR_XC(tc->channels[1]) /* input: TIOA */
+ | ATMEL_TC_CMR_WAVE
+ | ATMEL_TC_CMR_WAVESEL_UP, /* free-run */
+ tc->base + ATMEL_TC_CMR(tc->channels[1]));
+ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[1])); /* no irqs */
+ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[1]));
+
+ /* chain both channel, we assume the previous channel */
+ regmap_write(tc->regmap, ATMEL_TC_BMR,
+ ATMEL_TC_BMR_TCXC(1 + tc->channels[1], tc->channels[1]));
+ /* then reset all the timers */
+ regmap_write(tc->regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
+}
+
+static void __init tcb_setup_single_chan(struct atmel_tcb_clksrc *tc,
+ int mck_divisor_idx)
+{
+ /* channel 0: waveform mode, input mclk/8 */
+ writel(mck_divisor_idx /* likely divide-by-8 */
+ | ATMEL_TC_CMR_WAVE
+ | ATMEL_TC_CMR_WAVESEL_UP, /* free-run */
+ tc->base + ATMEL_TC_CMR(tc->channels[0]));
+ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[0])); /* no irqs */
+ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[0]));
+
+ /* then reset all the timers */
+ regmap_write(tc->regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
+}
+
+static void tc_clksrc_suspend(struct clocksource *cs)
+{
+ int i;
+
+ for (i = 0; i < 1 + (tc.bits == 16); i++) {
+ tc.cache[i].cmr = readl(tc.base + ATMEL_TC_CMR(tc.channels[i]));
+ tc.cache[i].imr = readl(tc.base + ATMEL_TC_IMR(tc.channels[i]));
+ tc.cache[i].rc = readl(tc.base + ATMEL_TC_RC(tc.channels[i]));
+ tc.cache[i].clken = !!(readl(tc.base +
+ ATMEL_TC_SR(tc.channels[i])) &
+ ATMEL_TC_CLKSTA);
+ }
+
+ if (tc.bits == 16)
+ regmap_read(tc.regmap, ATMEL_TC_BMR, &tc.bmr_cache);
+}
+
+static void tc_clksrc_resume(struct clocksource *cs)
+{
+ int i;
+
+ for (i = 0; i < 1 + (tc.bits == 16); i++) {
+ /* Restore registers for the channel, RA and RB are not used */
+ writel(tc.cache[i].cmr, tc.base + ATMEL_TC_CMR(tc.channels[i]));
+ writel(tc.cache[i].rc, tc.base + ATMEL_TC_RC(tc.channels[i]));
+ writel(0, tc.base + ATMEL_TC_RA(tc.channels[i]));
+ writel(0, tc.base + ATMEL_TC_RB(tc.channels[i]));
+ /* Disable all the interrupts */
+ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[i]));
+ /* Reenable interrupts that were enabled before suspending */
+ writel(tc.cache[i].imr, tc.base + ATMEL_TC_IER(tc.channels[i]));
+
+ /* Start the clock if it was used */
+ if (tc.cache[i].clken)
+ writel(ATMEL_TC_CCR_CLKEN, tc.base +
+ ATMEL_TC_CCR(tc.channels[i]));
+ }
+
+ /* in case of dual channel, chain channels */
+ if (tc.bits == 16)
+ regmap_write(tc.regmap, ATMEL_TC_BMR, tc.bmr_cache);
+ /* Finally, trigger all the channels*/
+ regmap_write(tc.regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
+}
+
+static int __init tcb_clksrc_register(struct device_node *node,
+ struct regmap *regmap, void __iomem *base,
+ int channel, int channel1, int irq,
+ int bits)
+{
+ u32 rate, divided_rate = 0;
+ int best_divisor_idx = -1;
+ int i, err = -1;
+ u64 (*tc_sched_clock)(void);
+
+ tc.regmap = regmap;
+ tc.base = base;
+ tc.channels[0] = channel;
+ tc.channels[1] = channel1;
+ tc.irq = irq;
+ tc.bits = bits;
+
+ tc.clk[0] = tcb_clk_get(node, tc.channels[0]);
+ if (IS_ERR(tc.clk[0]))
+ return PTR_ERR(tc.clk[0]);
+ err = clk_prepare_enable(tc.clk[0]);
+ if (err) {
+ pr_debug("can't enable T0 clk\n");
+ goto err_clk;
+ }
+
+ /* How fast will we be counting? Pick something over 5 MHz. */
+ rate = (u32)clk_get_rate(tc.clk[0]);
+ for (i = 0; i < 5; i++) {
+ unsigned int divisor = atmel_tc_divisors[i];
+ unsigned int tmp;
+
+ if (!divisor)
+ continue;
+
+ tmp = rate / divisor;
+ pr_debug("TC: %u / %-3u [%d] --> %u\n", rate, divisor, i, tmp);
+ if (best_divisor_idx > 0) {
+ if (tmp < 5 * 1000 * 1000)
+ continue;
+ }
+ divided_rate = tmp;
+ best_divisor_idx = i;
+ }
+
+ if (tc.bits == 32) {
+ tc.clksrc.read = tc_get_cycles32;
+ tcb_setup_single_chan(&tc, best_divisor_idx);
+ tc_sched_clock = tc_sched_clock_read32;
+ snprintf(tc.name, sizeof(tc.name), "%s:%d",
+ kbasename(node->parent->full_name), tc.channels[0]);
+ } else {
+ tc.clk[1] = tcb_clk_get(node, tc.channels[1]);
+ if (IS_ERR(tc.clk[1]))
+ goto err_disable_t0;
+
+ err = clk_prepare_enable(tc.clk[1]);
+ if (err) {
+ pr_debug("can't enable T1 clk\n");
+ goto err_clk1;
+ }
+ tc.clksrc.read = tc_get_cycles,
+ tcb_setup_dual_chan(&tc, best_divisor_idx);
+ tc_sched_clock = tc_sched_clock_read;
+ snprintf(tc.name, sizeof(tc.name), "%s:%d,%d",
+ kbasename(node->parent->full_name), tc.channels[0],
+ tc.channels[1]);
+ }
+
+ pr_debug("%s at %d.%03d MHz\n", tc.name,
+ divided_rate / 1000000,
+ ((divided_rate + 500000) % 1000000) / 1000);
+
+ tc.clksrc.name = tc.name;
+ tc.clksrc.suspend = tc_clksrc_suspend;
+ tc.clksrc.resume = tc_clksrc_resume;
+
+ err = clocksource_register_hz(&tc.clksrc, divided_rate);
+ if (err)
+ goto err_disable_t1;
+
+ sched_clock_register(tc_sched_clock, 32, divided_rate);
+
+ tc.registered = true;
+
+ /* Set up and register clockevents */
+ tc.clkevt.name = tc.name;
+ tc.clkevt.cpumask = cpumask_of(0);
+ tc.clkevt.set_next_event = tcb_clkevt_next_event;
+ tc.clkevt.set_state_oneshot = tcb_clkevt_oneshot;
+ tc.clkevt.set_state_shutdown = tcb_clkevt_shutdown;
+ clockevents_config_and_register(&tc.clkevt, divided_rate, 1,
+ BIT(tc.bits) - 1);
+
+ return 0;
+
+err_disable_t1:
+ if (tc.bits == 16)
+ clk_disable_unprepare(tc.clk[1]);
+
+err_clk1:
+ if (tc.bits == 16)
+ clk_put(tc.clk[1]);
+
+err_disable_t0:
+ clk_disable_unprepare(tc.clk[0]);
+
+err_clk:
+ clk_put(tc.clk[0]);
+
+ pr_err("%s: unable to register clocksource/clockevent\n",
+ tc.clksrc.name);
+
+ return err;
+}
+
+static int __init tcb_clksrc_init(struct device_node *node)
+{
+ const struct of_device_id *match;
+ const struct atmel_tcb_info *tcb_info;
+ struct regmap *regmap;
+ void __iomem *tcb_base;
+ u32 channel;
+ int bits, irq, err, chan1 = -1;
+
+ if (tc.registered && tce.registered)
+ return -ENODEV;
+
+ /*
+ * The regmap has to be used to access registers that are shared
+ * between channels on the same TCB but we keep direct IO access for
+ * the counters to avoid the impact on performance
+ */
+ regmap = syscon_node_to_regmap(node->parent);
+ if (IS_ERR(regmap))
+ return PTR_ERR(regmap);
+
+ tcb_base = of_iomap(node->parent, 0);
+ if (!tcb_base) {
+ pr_err("%s +%d %s\n", __FILE__, __LINE__, __func__);
+ return -ENXIO;
+ }
+
+ match = of_match_node(atmel_tcb_dt_ids, node->parent);
+ tcb_info = match->data;
+ bits = tcb_info->bits;
+
+ err = of_property_read_u32_index(node, "reg", 0, &channel);
+ if (err)
+ return err;
+
+ irq = tcb_irq_get(node, channel);
+ if (irq < 0)
+ return irq;
+
+ if (tc.registered)
+ return tc_clkevt_register(node, regmap, tcb_base, channel, irq,
+ bits);
+
+ if (bits == 16) {
+ of_property_read_u32_index(node, "reg", 1, &chan1);
+ if (chan1 == -1) {
+ if (tce.registered) {
+ pr_err("%s: clocksource needs two channels\n",
+ node->parent->full_name);
+ return -EINVAL;
+ } else {
+ return tc_clkevt_register(node, regmap,
+ tcb_base, channel,
+ irq, bits);
+ }
+ }
+ }
+
+ return tcb_clksrc_register(node, regmap, tcb_base, channel, chan1, irq,
+ bits);
+}
+CLOCKSOURCE_OF_DECLARE(atmel_tcb_clksrc, "atmel,tcb-timer",
+ tcb_clksrc_init);

View File

@ -1,37 +0,0 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 20 Dec 2017 17:12:51 +0100
Subject: [PATCH 02/29] hrtimer: Correct blantanly wrong comment
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
The protection of a hrtimer which runs its callback against migration to a
different CPU has nothing to do with hard interrupt context.
The protection against migration of a hrtimer running the expiry callback
is the pointer in the cpu_base which holds a pointer to the currently
running timer. This pointer is evaluated in the code which potentially
switches the timer base and makes sure it's kept on the CPU on which the
callback is running.
Reported-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/time/hrtimer.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1197,9 +1197,9 @@ static void __run_hrtimer(struct hrtimer
timer->is_rel = false;
/*
- * Because we run timers from hardirq context, there is no chance
- * they get migrated to another cpu, therefore its safe to unlock
- * the timer base.
+ * The timer is marked as running in the cpu base, so it is
+ * protected against migration to a different CPU even if the lock
+ * is dropped.
*/
raw_spin_unlock(&cpu_base->lock);
trace_hrtimer_expire_entry(timer, now);

View File

@ -1,32 +0,0 @@
From: Scott Wood <swood@redhat.com>
Date: Sun, 28 Jan 2018 14:22:19 -0600
Subject: [PATCH 2/3] iommu/amd: Don't use dev_data in irte_ga_set_affinity()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 01ee04badefd296eb7a4430497373be9b7b16783
search_dev_data() acquires a non-raw lock, which can't be done
from atomic context on PREEMPT_RT. There is no need to look at
dev_data because guest_mode should never be set if use_vapic is
not set.
Signed-off-by: Scott Wood <swood@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3877,10 +3877,8 @@ static void irte_ga_set_affinity(void *e
u8 vector, u32 dest_apicid)
{
struct irte_ga *irte = (struct irte_ga *) entry;
- struct iommu_dev_data *dev_data = search_dev_data(devid);
- if (!dev_data || !dev_data->use_vapic ||
- !irte->lo.fields_remap.guest_mode) {
+ if (!irte->lo.fields_remap.guest_mode) {
irte->hi.fields.vector = vector;
irte->lo.fields_remap.destination = dest_apicid;
modify_irte_ga(devid, index, irte, NULL);

View File

@ -1,98 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:34 +0100
Subject: [PATCH 02/10] iommu/amd: Turn dev_data_list into a lock less list
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 779da73273fc4c4c6f41579a95e4fb7880a1720e
alloc_dev_data() adds new items to dev_data_list and search_dev_data()
is searching for items in this list. Both protect the access to the list
with a spinlock.
There is no need to navigate forth and back within the list and there is
also no deleting of a specific item. This qualifies the list to become a
lock less list and as part of this, the spinlock can be removed.
With this change the ordering of those items within the list is changed:
before the change new items were added to the end of the list, now they
are added to the front. I don't think it matters but wanted to mention
it.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 28 ++++++++++------------------
drivers/iommu/amd_iommu_types.h | 2 +-
2 files changed, 11 insertions(+), 19 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -83,8 +83,7 @@
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
/* List of all available dev_data structures */
-static LIST_HEAD(dev_data_list);
-static DEFINE_SPINLOCK(dev_data_list_lock);
+static LLIST_HEAD(dev_data_list);
LIST_HEAD(ioapic_map);
LIST_HEAD(hpet_map);
@@ -203,40 +202,33 @@ static struct dma_ops_domain* to_dma_ops
static struct iommu_dev_data *alloc_dev_data(u16 devid)
{
struct iommu_dev_data *dev_data;
- unsigned long flags;
dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
if (!dev_data)
return NULL;
dev_data->devid = devid;
-
- spin_lock_irqsave(&dev_data_list_lock, flags);
- list_add_tail(&dev_data->dev_data_list, &dev_data_list);
- spin_unlock_irqrestore(&dev_data_list_lock, flags);
-
ratelimit_default_init(&dev_data->rs);
+ llist_add(&dev_data->dev_data_list, &dev_data_list);
return dev_data;
}
static struct iommu_dev_data *search_dev_data(u16 devid)
{
struct iommu_dev_data *dev_data;
- unsigned long flags;
+ struct llist_node *node;
+
+ if (llist_empty(&dev_data_list))
+ return NULL;
- spin_lock_irqsave(&dev_data_list_lock, flags);
- list_for_each_entry(dev_data, &dev_data_list, dev_data_list) {
+ node = dev_data_list.first;
+ llist_for_each_entry(dev_data, node, dev_data_list) {
if (dev_data->devid == devid)
- goto out_unlock;
+ return dev_data;
}
- dev_data = NULL;
-
-out_unlock:
- spin_unlock_irqrestore(&dev_data_list_lock, flags);
-
- return dev_data;
+ return NULL;
}
static int __last_alias(struct pci_dev *pdev, u16 alias, void *data)
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -627,7 +627,7 @@ struct devid_map {
*/
struct iommu_dev_data {
struct list_head list; /* For domain->dev_list */
- struct list_head dev_data_list; /* For global dev_data_list */
+ struct llist_node dev_data_list; /* For global dev_data_list */
struct protection_domain *domain; /* Domain the device is bound to */
u16 devid; /* PCI Device ID */
u16 alias; /* Alias Device ID */

View File

@ -1,53 +0,0 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 22 Dec 2017 15:51:13 +0100
Subject: [PATCH 2/4] nohz: Prevent erroneous tick stop invocations
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.8-rt9.tar.xz
The conditions in irq_exit() to invoke tick_nohz_irq_exit() are:
if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu))
This is too permissive in various aspects:
1) If need_resched() is set, then the tick cannot be stopped whether
the CPU is idle or in nohz full mode.
2) If need_resched() is not set, but softirqs are pending then this is an
indication that the softirq code punted and delegated the execution to
softirqd. need_resched() is not true because the current interrupted
task takes precedence over softirqd.
Invoking tick_nohz_irq_exit() in these cases can cause an endless loop of
timer interrupts because the timer wheel contains an expired timer, but
softirqs are not yet executed. So it returns an immediate expiry request,
which causes the timer to fire immediately again. Lather, rinse and
repeat....
Prevent that by making the conditions proper and only allow invokation when
in idle or nohz full mode and neither need_resched() nor
local_softirq_pending() are set.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/softirq.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 4e09821f9d9e..6d260b1229a1 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -381,7 +381,8 @@ static inline void tick_irq_exit(void)
int cpu = smp_processor_id();
/* Make sure that timer wheel updates are propagated */
- if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
+ if ((idle_cpu(cpu) || tick_nohz_full_cpu(cpu)) &&
+ !need_resched() && !local_softirq_pending()) {
if (!in_interrupt())
tick_nohz_irq_exit();
}
--
2.15.1

View File

@ -1,46 +0,0 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 23 Feb 2018 22:07:35 -0500
Subject: [PATCH 02/17] now lock_parent() can't run into killed dentry
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 65d8eb5a8f5480756105173de147ef5d60163e2f
all remaining callers hold either a reference or ->i_lock
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 13 +++----------
1 file changed, 3 insertions(+), 10 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -593,8 +593,6 @@ static inline struct dentry *lock_parent
struct dentry *parent = dentry->d_parent;
if (IS_ROOT(dentry))
return NULL;
- if (unlikely(dentry->d_lockref.count < 0))
- return NULL;
if (likely(spin_trylock(&parent->d_lock)))
return parent;
rcu_read_lock();
@@ -614,16 +612,11 @@ static inline struct dentry *lock_parent
spin_unlock(&parent->d_lock);
goto again;
}
- if (parent != dentry) {
+ rcu_read_unlock();
+ if (parent != dentry)
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
- if (unlikely(dentry->d_lockref.count < 0)) {
- spin_unlock(&parent->d_lock);
- parent = NULL;
- }
- } else {
+ else
parent = NULL;
- }
- rcu_read_unlock();
return parent;
}

View File

@ -1,24 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:36 -0600
Subject: [PATCH 02/37] tracing: Add Documentation for log2 modifier
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Add a line for the log2 modifier, to keep it aligned with
tracing/README.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
Documentation/trace/histogram.txt | 1 +
1 file changed, 1 insertion(+)
--- a/Documentation/trace/histogram.txt
+++ b/Documentation/trace/histogram.txt
@@ -73,6 +73,7 @@
.sym-offset display an address as a symbol and offset
.syscall display a syscall id as a system call name
.execname display a common_pid as a program name
+ .log2 display log2 value rather than raw number
Note that in general the semantics of a given field aren't
interpreted when applying a modifier to it, but there are some

View File

@ -1,50 +0,0 @@
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 16 May 2018 09:36:44 -0400
Subject: [PATCH 2/5] tracing: Add field parsing hist error for hist triggers
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
From: Tom Zanussi <tom.zanussi@linux.intel.com>
[ commit 5ec432d7bf9dd3b4a2b84f8974e3adb71f45fb1d ]
If the user specifies a nonexistent field for a hist trigger, the
current code correctly flags that as an error, but doesn't tell the
user what happened.
Fix this by invoking hist_err() with an appropriate message when
nonexistent fields are specified.
Before:
# echo 'hist:keys=pid:ts0=common_timestamp.usecs' >> /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
-su: echo: write error: Invalid argument
# cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist
After:
# echo 'hist:keys=pid:ts0=common_timestamp.usecs' >> /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
-su: echo: write error: Invalid argument
# cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist
ERROR: Couldn't find field: pid
Last command: keys=pid:ts0=common_timestamp.usecs
Link: http://lkml.kernel.org/r/fdc8746969d16906120f162b99dd71c741e0b62c.1524790601.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Reported-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 1 +
1 file changed, 1 insertion(+)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -2482,6 +2482,7 @@ parse_field(struct hist_trigger_data *hi
else {
field = trace_find_event_field(file->event_call, field_name);
if (!field || !field->size) {
+ hist_err("Couldn't find field: ", field_name);
field = ERR_PTR(-EINVAL);
goto out;
}

View File

@ -1,189 +0,0 @@
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Fri, 22 Sep 2017 14:58:16 -0500
Subject: [PATCH 02/42] tracing: Reverse the order of trace_types_lock and
event_mutex
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
In order to make future changes where we need to call
tracing_set_clock() from within an event command, the order of
trace_types_lock and event_mutex must be reversed, as the event command
will hold event_mutex and the trace_types_lock is taken from within
tracing_set_clock().
Requested-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace.c | 5 +++++
kernel/trace/trace_events.c | 31 +++++++++++++++----------------
2 files changed, 20 insertions(+), 16 deletions(-)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7671,6 +7671,7 @@ static int instance_mkdir(const char *na
struct trace_array *tr;
int ret;
+ mutex_lock(&event_mutex);
mutex_lock(&trace_types_lock);
ret = -EEXIST;
@@ -7726,6 +7727,7 @@ static int instance_mkdir(const char *na
list_add(&tr->list, &ftrace_trace_arrays);
mutex_unlock(&trace_types_lock);
+ mutex_unlock(&event_mutex);
return 0;
@@ -7737,6 +7739,7 @@ static int instance_mkdir(const char *na
out_unlock:
mutex_unlock(&trace_types_lock);
+ mutex_unlock(&event_mutex);
return ret;
@@ -7749,6 +7752,7 @@ static int instance_rmdir(const char *na
int ret;
int i;
+ mutex_lock(&event_mutex);
mutex_lock(&trace_types_lock);
ret = -ENODEV;
@@ -7794,6 +7798,7 @@ static int instance_rmdir(const char *na
out_unlock:
mutex_unlock(&trace_types_lock);
+ mutex_unlock(&event_mutex);
return ret;
}
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1406,8 +1406,8 @@ static int subsystem_open(struct inode *
return -ENODEV;
/* Make sure the system still exists */
- mutex_lock(&trace_types_lock);
mutex_lock(&event_mutex);
+ mutex_lock(&trace_types_lock);
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
list_for_each_entry(dir, &tr->systems, list) {
if (dir == inode->i_private) {
@@ -1421,8 +1421,8 @@ static int subsystem_open(struct inode *
}
}
exit_loop:
- mutex_unlock(&event_mutex);
mutex_unlock(&trace_types_lock);
+ mutex_unlock(&event_mutex);
if (!system)
return -ENODEV;
@@ -2308,15 +2308,15 @@ static void __add_event_to_tracers(struc
int trace_add_event_call(struct trace_event_call *call)
{
int ret;
- mutex_lock(&trace_types_lock);
mutex_lock(&event_mutex);
+ mutex_lock(&trace_types_lock);
ret = __register_event(call, NULL);
if (ret >= 0)
__add_event_to_tracers(call);
- mutex_unlock(&event_mutex);
mutex_unlock(&trace_types_lock);
+ mutex_unlock(&event_mutex);
return ret;
}
@@ -2370,13 +2370,13 @@ int trace_remove_event_call(struct trace
{
int ret;
- mutex_lock(&trace_types_lock);
mutex_lock(&event_mutex);
+ mutex_lock(&trace_types_lock);
down_write(&trace_event_sem);
ret = probe_remove_event_call(call);
up_write(&trace_event_sem);
- mutex_unlock(&event_mutex);
mutex_unlock(&trace_types_lock);
+ mutex_unlock(&event_mutex);
return ret;
}
@@ -2438,8 +2438,8 @@ static int trace_module_notify(struct no
{
struct module *mod = data;
- mutex_lock(&trace_types_lock);
mutex_lock(&event_mutex);
+ mutex_lock(&trace_types_lock);
switch (val) {
case MODULE_STATE_COMING:
trace_module_add_events(mod);
@@ -2448,8 +2448,8 @@ static int trace_module_notify(struct no
trace_module_remove_events(mod);
break;
}
- mutex_unlock(&event_mutex);
mutex_unlock(&trace_types_lock);
+ mutex_unlock(&event_mutex);
return 0;
}
@@ -2964,24 +2964,24 @@ create_event_toplevel_files(struct dentr
* creates the event hierachry in the @parent/events directory.
*
* Returns 0 on success.
+ *
+ * Must be called with event_mutex held.
*/
int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
{
int ret;
- mutex_lock(&event_mutex);
+ lockdep_assert_held(&event_mutex);
ret = create_event_toplevel_files(parent, tr);
if (ret)
- goto out_unlock;
+ goto out;
down_write(&trace_event_sem);
__trace_add_event_dirs(tr);
up_write(&trace_event_sem);
- out_unlock:
- mutex_unlock(&event_mutex);
-
+ out:
return ret;
}
@@ -3010,9 +3010,10 @@ early_event_add_tracer(struct dentry *pa
return ret;
}
+/* Must be called with event_mutex held */
int event_trace_del_tracer(struct trace_array *tr)
{
- mutex_lock(&event_mutex);
+ lockdep_assert_held(&event_mutex);
/* Disable any event triggers and associated soft-disabled events */
clear_event_triggers(tr);
@@ -3033,8 +3034,6 @@ int event_trace_del_tracer(struct trace_
tr->event_dir = NULL;
- mutex_unlock(&event_mutex);
-
return 0;
}

View File

@ -1,83 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 7 May 2018 17:09:42 +0200
Subject: [PATCH] userns: use refcount_t for reference counting instead
atomic_t
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
refcount_t type and corresponding API should be used instead of atomic_t when
the variable is used as a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free situations.
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/sched/user.h | 5 +++--
kernel/user.c | 8 ++++----
2 files changed, 7 insertions(+), 6 deletions(-)
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -4,6 +4,7 @@
#include <linux/uidgid.h>
#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <linux/ratelimit.h>
struct key;
@@ -12,7 +13,7 @@ struct key;
* Some day this will be a full-fledged user tracking system..
*/
struct user_struct {
- atomic_t __count; /* reference count */
+ refcount_t __count; /* reference count */
atomic_t processes; /* How many processes does this user have? */
atomic_t sigpending; /* How many pending signals does this user have? */
#ifdef CONFIG_FANOTIFY
@@ -59,7 +60,7 @@ extern struct user_struct root_user;
extern struct user_struct * alloc_uid(kuid_t);
static inline struct user_struct *get_uid(struct user_struct *u)
{
- atomic_inc(&u->__count);
+ refcount_inc(&u->__count);
return u;
}
extern void free_uid(struct user_struct *);
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -96,7 +96,7 @@ static DEFINE_SPINLOCK(uidhash_lock);
/* root_user.__count is 1, for init task cred */
struct user_struct root_user = {
- .__count = ATOMIC_INIT(1),
+ .__count = REFCOUNT_INIT(1),
.processes = ATOMIC_INIT(1),
.sigpending = ATOMIC_INIT(0),
.locked_shm = 0,
@@ -123,7 +123,7 @@ static struct user_struct *uid_hash_find
hlist_for_each_entry(user, hashent, uidhash_node) {
if (uid_eq(user->uid, uid)) {
- atomic_inc(&user->__count);
+ refcount_inc(&user->__count);
return user;
}
}
@@ -170,7 +170,7 @@ void free_uid(struct user_struct *up)
return;
local_irq_save(flags);
- if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
+ if (refcount_dec_and_lock(&up->__count, &uidhash_lock))
free_user(up, flags);
else
local_irq_restore(flags);
@@ -191,7 +191,7 @@ struct user_struct *alloc_uid(kuid_t uid
goto out_unlock;
new->uid = uid;
- atomic_set(&new->__count, 1);
+ refcount_set(&new->__count, 1);
ratelimit_state_init(&new->ratelimit, HZ, 100);
ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE);

View File

@ -1,30 +0,0 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:40 +0200
Subject: [PATCH 3/6] clocksource/drivers: atmel-pit: make option silent
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
To conform with the other option, make the ATMEL_PIT option silent so it
can be selected from the platform
Tested-by: Alexander Dahl <ada@thorsis.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/clocksource/Kconfig | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -381,8 +381,11 @@ config ARMV7M_SYSTICK
This options enables support for the ARMv7M system timer unit
config ATMEL_PIT
+ bool "Microchip ARM Periodic Interval Timer (PIT)" if COMPILE_TEST
select TIMER_OF if OF
- def_bool SOC_AT91SAM9 || SOC_SAMA5
+ help
+ This enables build of clocksource and clockevent driver for
+ the integrated PIT in Microchip ARM SoCs.
config ATMEL_ST
bool "Atmel ST timer support" if COMPILE_TEST

View File

@ -1,43 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 20 Dec 2017 17:12:52 +0100
Subject: [PATCH 03/29] hrtimer: Fix kerneldoc for struct hrtimer_cpu_base
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
The sequence '/**' marks the start of a struct description. Add the
missing second asterisk. While at it adapt the ordering of the struct
members to the struct definition and document the purpose of
expires_next more precisely.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/hrtimer.h | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -144,7 +144,7 @@ enum hrtimer_base_type {
HRTIMER_MAX_CLOCK_BASES,
};
-/*
+/**
* struct hrtimer_cpu_base - the per cpu clock bases
* @lock: lock protecting the base and associated clock bases
* and timers
@@ -153,12 +153,12 @@ enum hrtimer_base_type {
* @cpu: cpu number
* @active_bases: Bitfield to mark bases with active timers
* @clock_was_set_seq: Sequence counter of clock was set events
- * @expires_next: absolute time of the next event which was scheduled
- * via clock_set_next_event()
- * @next_timer: Pointer to the first expiring timer
* @in_hrtirq: hrtimer_interrupt() is currently executing
* @hres_active: State of high resolution mode
* @hang_detected: The last hrtimer interrupt detected a hang
+ * @expires_next: absolute time of the next event, is required for remote
+ * hrtimer enqueue
+ * @next_timer: Pointer to the first expiring timer
* @nr_events: Total number of hrtimer interrupt events
* @nr_retries: Total number of hrtimer interrupt retries
* @nr_hangs: Total number of hrtimer interrupt hangs

View File

@ -1,117 +0,0 @@
From: Scott Wood <swood@redhat.com>
Date: Wed, 14 Feb 2018 17:36:28 -0600
Subject: [PATCH 3/3] iommu/amd: Avoid locking get_irq_table() from atomic
context
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit df42a04b15f19a842393dc98a84cbc52b1f8ed49
get_irq_table() previously acquired amd_iommu_devtable_lock which is not
a raw lock, and thus cannot be acquired from atomic context on
PREEMPT_RT. Many calls to modify_irte*() come from atomic context due to
the IRQ desc->lock, as does amd_iommu_update_ga() due to the preemption
disabling in vcpu_load/put().
The only difference between calling get_irq_table() and reading from
irq_lookup_table[] directly, other than the lock acquisition and
amd_iommu_rlookup_table[] check, is if the table entry is unpopulated,
which should never happen when looking up a devid that came from an
irq_2_irte struct, as get_irq_table() would have already been called on
that devid during irq_remapping_alloc().
The lock acquisition is not needed in these cases because entries in
irq_lookup_table[] never change once non-NULL -- nor would the
amd_iommu_devtable_lock usage in get_irq_table() provide meaningful
protection if they did, since it's released before using the looked up
table in the get_irq_table() caller.
Rename the old get_irq_table() to alloc_irq_table(), and create a new
lockless get_irq_table() to be used in non-allocating contexts that WARNs
if it doesn't find what it's looking for.
Signed-off-by: Scott Wood <swood@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 29 ++++++++++++++++++++++-------
1 file changed, 22 insertions(+), 7 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3595,7 +3595,22 @@ static void set_dte_irq_entry(u16 devid,
amd_iommu_dev_table[devid].data[2] = dte;
}
-static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic)
+static struct irq_remap_table *get_irq_table(u16 devid)
+{
+ struct irq_remap_table *table;
+
+ if (WARN_ONCE(!amd_iommu_rlookup_table[devid],
+ "%s: no iommu for devid %x\n", __func__, devid))
+ return NULL;
+
+ table = irq_lookup_table[devid];
+ if (WARN_ONCE(!table, "%s: no table for devid %x\n", __func__, devid))
+ return NULL;
+
+ return table;
+}
+
+static struct irq_remap_table *alloc_irq_table(u16 devid, bool ioapic)
{
struct irq_remap_table *table = NULL;
struct amd_iommu *iommu;
@@ -3682,7 +3697,7 @@ static int alloc_irq_index(u16 devid, in
if (!iommu)
return -ENODEV;
- table = get_irq_table(devid, false);
+ table = alloc_irq_table(devid, false);
if (!table)
return -ENODEV;
@@ -3733,7 +3748,7 @@ static int modify_irte_ga(u16 devid, int
if (iommu == NULL)
return -EINVAL;
- table = get_irq_table(devid, false);
+ table = get_irq_table(devid);
if (!table)
return -ENOMEM;
@@ -3766,7 +3781,7 @@ static int modify_irte(u16 devid, int in
if (iommu == NULL)
return -EINVAL;
- table = get_irq_table(devid, false);
+ table = get_irq_table(devid);
if (!table)
return -ENOMEM;
@@ -3790,7 +3805,7 @@ static void free_irte(u16 devid, int ind
if (iommu == NULL)
return;
- table = get_irq_table(devid, false);
+ table = get_irq_table(devid);
if (!table)
return;
@@ -4108,7 +4123,7 @@ static int irq_remapping_alloc(struct ir
return ret;
if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) {
- if (get_irq_table(devid, true))
+ if (alloc_irq_table(devid, true))
index = info->ioapic_pin;
else
ret = -ENOMEM;
@@ -4391,7 +4406,7 @@ int amd_iommu_update_ga(int cpu, bool is
if (!iommu)
return -ENODEV;
- irt = get_irq_table(devid, false);
+ irt = get_irq_table(devid);
if (!irt)
return -ENODEV;

View File

@ -1,63 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:35 +0100
Subject: [PATCH 03/10] iommu/amd: Split domain id out of
amd_iommu_devtable_lock
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 2bc00180890427dcc092b2f2b0d03c904bcade29
domain_id_alloc() and domain_id_free() is used for id management. Those
two function share a bitmap (amd_iommu_pd_alloc_bitmap) and set/clear
bits based on id allocation. There is no need to share this with
amd_iommu_devtable_lock, it can use its own lock for this operation.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -81,6 +81,7 @@
#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
+static DEFINE_SPINLOCK(pd_bitmap_lock);
/* List of all available dev_data structures */
static LLIST_HEAD(dev_data_list);
@@ -1599,29 +1600,26 @@ static void del_domain_from_list(struct
static u16 domain_id_alloc(void)
{
- unsigned long flags;
int id;
- write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ spin_lock(&pd_bitmap_lock);
id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
BUG_ON(id == 0);
if (id > 0 && id < MAX_DOMAIN_ID)
__set_bit(id, amd_iommu_pd_alloc_bitmap);
else
id = 0;
- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ spin_unlock(&pd_bitmap_lock);
return id;
}
static void domain_id_free(int id)
{
- unsigned long flags;
-
- write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ spin_lock(&pd_bitmap_lock);
if (id > 0 && id < MAX_DOMAIN_ID)
__clear_bit(id, amd_iommu_pd_alloc_bitmap);
- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ spin_unlock(&pd_bitmap_lock);
}
#define DEFINE_FREE_PT_FN(LVL, FN) \

View File

@ -1,365 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 7 May 2018 17:42:52 +0200
Subject: [PATCH] md: raid5: use refcount_t for reference counting instead
atomic_t
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
refcount_t type and corresponding API should be used instead of atomic_t when
the variable is used as a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free situations.
Most changes are 1:1 replacements except for
BUG_ON(atomic_inc_return(&sh->count) != 1);
which has been turned into
refcount_inc(&sh->count);
BUG_ON(refcount_read(&sh->count) != 1);
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/md/raid5-cache.c | 8 ++---
drivers/md/raid5-ppl.c | 2 -
drivers/md/raid5.c | 67 +++++++++++++++++++++++------------------------
drivers/md/raid5.h | 4 +-
4 files changed, 41 insertions(+), 40 deletions(-)
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1049,7 +1049,7 @@ int r5l_write_stripe(struct r5l_log *log
* don't delay.
*/
clear_bit(STRIPE_DELAYED, &sh->state);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
mutex_lock(&log->io_mutex);
/* meta + data */
@@ -1388,7 +1388,7 @@ static void r5c_flush_stripe(struct r5co
lockdep_assert_held(&conf->device_lock);
list_del_init(&sh->lru);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
set_bit(STRIPE_HANDLE, &sh->state);
atomic_inc(&conf->active_stripes);
@@ -1491,7 +1491,7 @@ static void r5c_do_reclaim(struct r5conf
*/
if (!list_empty(&sh->lru) &&
!test_bit(STRIPE_HANDLE, &sh->state) &&
- atomic_read(&sh->count) == 0) {
+ refcount_read(&sh->count) == 0) {
r5c_flush_stripe(conf, sh);
if (count++ >= R5C_RECLAIM_STRIPE_GROUP)
break;
@@ -2912,7 +2912,7 @@ int r5c_cache_data(struct r5l_log *log,
* don't delay.
*/
clear_bit(STRIPE_DELAYED, &sh->state);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
mutex_lock(&log->io_mutex);
/* meta + data */
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -388,7 +388,7 @@ int ppl_write_stripe(struct r5conf *conf
set_bit(STRIPE_LOG_TRAPPED, &sh->state);
clear_bit(STRIPE_DELAYED, &sh->state);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
if (ppl_log_stripe(log, sh)) {
spin_lock_irq(&ppl_conf->no_mem_stripes_lock);
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -306,7 +306,7 @@ static void do_release_stripe(struct r5c
static void __release_stripe(struct r5conf *conf, struct stripe_head *sh,
struct list_head *temp_inactive_list)
{
- if (atomic_dec_and_test(&sh->count))
+ if (refcount_dec_and_test(&sh->count))
do_release_stripe(conf, sh, temp_inactive_list);
}
@@ -398,7 +398,7 @@ void raid5_release_stripe(struct stripe_
/* Avoid release_list until the last reference.
*/
- if (atomic_add_unless(&sh->count, -1, 1))
+ if (refcount_dec_not_one(&sh->count))
return;
if (unlikely(!conf->mddev->thread) ||
@@ -411,7 +411,7 @@ void raid5_release_stripe(struct stripe_
slow_path:
local_irq_save(flags);
/* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */
- if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) {
+ if (refcount_dec_and_lock(&sh->count, &conf->device_lock)) {
INIT_LIST_HEAD(&list);
hash = sh->hash_lock_index;
do_release_stripe(conf, sh, &list);
@@ -501,7 +501,7 @@ static void init_stripe(struct stripe_he
struct r5conf *conf = sh->raid_conf;
int i, seq;
- BUG_ON(atomic_read(&sh->count) != 0);
+ BUG_ON(refcount_read(&sh->count) != 0);
BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
BUG_ON(stripe_operations_active(sh));
BUG_ON(sh->batch_head);
@@ -678,11 +678,11 @@ raid5_get_active_stripe(struct r5conf *c
&conf->cache_state);
} else {
init_stripe(sh, sector, previous);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
}
- } else if (!atomic_inc_not_zero(&sh->count)) {
+ } else if (!refcount_inc_not_zero(&sh->count)) {
spin_lock(&conf->device_lock);
- if (!atomic_read(&sh->count)) {
+ if (!refcount_read(&sh->count)) {
if (!test_bit(STRIPE_HANDLE, &sh->state))
atomic_inc(&conf->active_stripes);
BUG_ON(list_empty(&sh->lru) &&
@@ -698,7 +698,7 @@ raid5_get_active_stripe(struct r5conf *c
sh->group = NULL;
}
}
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
spin_unlock(&conf->device_lock);
}
} while (sh == NULL);
@@ -760,9 +760,9 @@ static void stripe_add_to_batch_list(str
hash = stripe_hash_locks_hash(head_sector);
spin_lock_irq(conf->hash_locks + hash);
head = __find_stripe(conf, head_sector, conf->generation);
- if (head && !atomic_inc_not_zero(&head->count)) {
+ if (head && !refcount_inc_not_zero(&head->count)) {
spin_lock(&conf->device_lock);
- if (!atomic_read(&head->count)) {
+ if (!refcount_read(&head->count)) {
if (!test_bit(STRIPE_HANDLE, &head->state))
atomic_inc(&conf->active_stripes);
BUG_ON(list_empty(&head->lru) &&
@@ -778,7 +778,7 @@ static void stripe_add_to_batch_list(str
head->group = NULL;
}
}
- atomic_inc(&head->count);
+ refcount_inc(&head->count);
spin_unlock(&conf->device_lock);
}
spin_unlock_irq(conf->hash_locks + hash);
@@ -847,7 +847,7 @@ static void stripe_add_to_batch_list(str
sh->batch_head->bm_seq = seq;
}
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
unlock_out:
unlock_two_stripes(head, sh);
out:
@@ -1110,9 +1110,9 @@ static void ops_run_io(struct stripe_hea
pr_debug("%s: for %llu schedule op %d on disc %d\n",
__func__, (unsigned long long)sh->sector,
bi->bi_opf, i);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
if (sh != head_sh)
- atomic_inc(&head_sh->count);
+ refcount_inc(&head_sh->count);
if (use_new_offset(conf, sh))
bi->bi_iter.bi_sector = (sh->sector
+ rdev->new_data_offset);
@@ -1174,9 +1174,9 @@ static void ops_run_io(struct stripe_hea
"replacement disc %d\n",
__func__, (unsigned long long)sh->sector,
rbi->bi_opf, i);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
if (sh != head_sh)
- atomic_inc(&head_sh->count);
+ refcount_inc(&head_sh->count);
if (use_new_offset(conf, sh))
rbi->bi_iter.bi_sector = (sh->sector
+ rrdev->new_data_offset);
@@ -1354,7 +1354,7 @@ static void ops_run_biofill(struct strip
}
}
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
async_trigger_callback(&submit);
}
@@ -1432,7 +1432,7 @@ ops_run_compute5(struct stripe_head *sh,
if (i != target)
xor_srcs[count++] = sh->dev[i].page;
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
ops_complete_compute, sh, to_addr_conv(sh, percpu, 0));
@@ -1521,7 +1521,7 @@ ops_run_compute6_1(struct stripe_head *s
BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
dest = tgt->page;
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
if (target == qd_idx) {
count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL);
@@ -1596,7 +1596,7 @@ ops_run_compute6_2(struct stripe_head *s
pr_debug("%s: stripe: %llu faila: %d failb: %d\n",
__func__, (unsigned long long)sh->sector, faila, failb);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
if (failb == syndrome_disks+1) {
/* Q disk is one of the missing disks */
@@ -1867,7 +1867,7 @@ ops_run_reconstruct5(struct stripe_head
break;
}
if (i >= sh->disks) {
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
set_bit(R5_Discard, &sh->dev[pd_idx].flags);
ops_complete_reconstruct(sh);
return;
@@ -1908,7 +1908,7 @@ ops_run_reconstruct5(struct stripe_head
flags = ASYNC_TX_ACK |
(prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
- atomic_inc(&head_sh->count);
+ refcount_inc(&head_sh->count);
init_async_submit(&submit, flags, tx, ops_complete_reconstruct, head_sh,
to_addr_conv(sh, percpu, j));
} else {
@@ -1950,7 +1950,7 @@ ops_run_reconstruct6(struct stripe_head
break;
}
if (i >= sh->disks) {
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
set_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
set_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
ops_complete_reconstruct(sh);
@@ -1974,7 +1974,7 @@ ops_run_reconstruct6(struct stripe_head
struct stripe_head, batch_list) == head_sh;
if (last_stripe) {
- atomic_inc(&head_sh->count);
+ refcount_inc(&head_sh->count);
init_async_submit(&submit, txflags, tx, ops_complete_reconstruct,
head_sh, to_addr_conv(sh, percpu, j));
} else
@@ -2031,7 +2031,7 @@ static void ops_run_check_p(struct strip
tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
&sh->ops.zero_sum_result, &submit);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
tx = async_trigger_callback(&submit);
}
@@ -2050,7 +2050,7 @@ static void ops_run_check_pq(struct stri
if (!checkp)
srcs[count] = NULL;
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
sh, to_addr_conv(sh, percpu, 0));
async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
@@ -2150,7 +2150,7 @@ static struct stripe_head *alloc_stripe(
INIT_LIST_HEAD(&sh->lru);
INIT_LIST_HEAD(&sh->r5c);
INIT_LIST_HEAD(&sh->log_list);
- atomic_set(&sh->count, 1);
+ refcount_set(&sh->count, 1);
sh->raid_conf = conf;
sh->log_start = MaxSector;
for (i = 0; i < disks; i++) {
@@ -2451,7 +2451,7 @@ static int drop_one_stripe(struct r5conf
spin_unlock_irq(conf->hash_locks + hash);
if (!sh)
return 0;
- BUG_ON(atomic_read(&sh->count));
+ BUG_ON(refcount_read(&sh->count));
shrink_buffers(sh);
free_stripe(conf->slab_cache, sh);
atomic_dec(&conf->active_stripes);
@@ -2483,7 +2483,7 @@ static void raid5_end_read_request(struc
break;
pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
- (unsigned long long)sh->sector, i, atomic_read(&sh->count),
+ (unsigned long long)sh->sector, i, refcount_read(&sh->count),
bi->bi_status);
if (i == disks) {
bio_reset(bi);
@@ -2620,7 +2620,7 @@ static void raid5_end_write_request(stru
}
}
pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
- (unsigned long long)sh->sector, i, atomic_read(&sh->count),
+ (unsigned long long)sh->sector, i, refcount_read(&sh->count),
bi->bi_status);
if (i == disks) {
bio_reset(bi);
@@ -4687,7 +4687,7 @@ static void handle_stripe(struct stripe_
pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
"pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
(unsigned long long)sh->sector, sh->state,
- atomic_read(&sh->count), sh->pd_idx, sh->qd_idx,
+ refcount_read(&sh->count), sh->pd_idx, sh->qd_idx,
sh->check_state, sh->reconstruct_state);
analyse_stripe(sh, &s);
@@ -5062,7 +5062,7 @@ static void activate_bit_delay(struct r5
struct stripe_head *sh = list_entry(head.next, struct stripe_head, lru);
int hash;
list_del_init(&sh->lru);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
hash = sh->hash_lock_index;
__release_stripe(conf, sh, &temp_inactive_list[hash]);
}
@@ -5387,7 +5387,8 @@ static struct stripe_head *__get_priorit
sh->group = NULL;
}
list_del_init(&sh->lru);
- BUG_ON(atomic_inc_return(&sh->count) != 1);
+ refcount_inc(&sh->count);
+ BUG_ON(refcount_read(&sh->count) != 1);
return sh;
}
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -4,7 +4,7 @@
#include <linux/raid/xor.h>
#include <linux/dmaengine.h>
-
+#include <linux/refcount.h>
/*
*
* Each stripe contains one buffer per device. Each buffer can be in
@@ -208,7 +208,7 @@ struct stripe_head {
short ddf_layout;/* use DDF ordering to calculate Q */
short hash_lock_index;
unsigned long state; /* state flags */
- atomic_t count; /* nr of active thread/requests */
+ refcount_t count; /* nr of active thread/requests */
int bm_seq; /* sequence number for bitmap flushes */
int disks; /* disks in stripe */
int overwrite_disks; /* total overwrite disks in stripe,

View File

@ -1,50 +0,0 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 23 Feb 2018 22:11:34 -0500
Subject: [PATCH 03/17] split the slow part of lock_parent() off
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 8b987a46a1e0e93d4cb4babea06ea274e2e2b658
Turn the "trylock failed" part into uninlined __lock_parent().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -588,13 +588,9 @@ static void __dentry_kill(struct dentry
dentry_free(dentry);
}
-static inline struct dentry *lock_parent(struct dentry *dentry)
+static struct dentry *__lock_parent(struct dentry *dentry)
{
- struct dentry *parent = dentry->d_parent;
- if (IS_ROOT(dentry))
- return NULL;
- if (likely(spin_trylock(&parent->d_lock)))
- return parent;
+ struct dentry *parent;
rcu_read_lock();
spin_unlock(&dentry->d_lock);
again:
@@ -620,6 +616,16 @@ static inline struct dentry *lock_parent
return parent;
}
+static inline struct dentry *lock_parent(struct dentry *dentry)
+{
+ struct dentry *parent = dentry->d_parent;
+ if (IS_ROOT(dentry))
+ return NULL;
+ if (likely(spin_trylock(&parent->d_lock)))
+ return parent;
+ return __lock_parent(dentry);
+}
+
/*
* Finish off a dentry we've decided to kill.
* dentry->d_lock must be held, returns with it unlocked.

View File

@ -1,115 +0,0 @@
From: Vedang Patel <vedang.patel@intel.com>
Date: Mon, 15 Jan 2018 20:51:37 -0600
Subject: [PATCH 03/37] tracing: Add support to detect and avoid duplicates
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
A duplicate in the tracing_map hash table is when 2 different entries
have the same key and, as a result, the key_hash. This is possible due
to a race condition in the algorithm. This race condition is inherent to
the algorithm and not a bug. This was fine because, until now, we were
only interested in the sum of all the values related to a particular
key (the duplicates are dealt with in tracing_map_sort_entries()). But,
with the inclusion of variables[1], we are interested in individual
values. So, it will not be clear what value to choose when
there are duplicates. So, the duplicates need to be removed.
The duplicates can occur in the code in the following scenarios:
- A thread is in the process of adding a new element. It has
successfully executed cmpxchg() and inserted the key. But, it is still
not done acquiring the trace_map_elt struct, populating it and storing
the pointer to the struct in the value field of tracing_map hash table.
If another thread comes in at this time and wants to add an element with
the same key, it will not see the current element and add a new one.
- There are multiple threads trying to execute cmpxchg at the same time,
one of the threads will succeed and the others will fail. The ones which
fail will go ahead increment 'idx' and add a new element there creating
a duplicate.
This patch detects and avoids the first condition by asking the thread
which detects the duplicate to loop one more time. There is also a
possibility of infinite loop if the thread which is trying to insert
goes to sleep indefinitely and the one which is trying to insert a new
element detects a duplicate. Which is why, the thread loops for
map_size iterations before returning NULL.
The second scenario is avoided by preventing the threads which failed
cmpxchg() from incrementing idx. This way, they will loop
around and check if the thread which succeeded in executing cmpxchg()
had the same key.
[1] http://lkml.kernel.org/r/cover.1498510759.git.tom.zanussi@linux.intel.com
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/tracing_map.c | 41 ++++++++++++++++++++++++++++++++++++-----
1 file changed, 36 insertions(+), 5 deletions(-)
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -414,7 +414,9 @@ static inline struct tracing_map_elt *
__tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only)
{
u32 idx, key_hash, test_key;
+ int dup_try = 0;
struct tracing_map_entry *entry;
+ struct tracing_map_elt *val;
key_hash = jhash(key, map->key_size, 0);
if (key_hash == 0)
@@ -426,11 +428,33 @@ static inline struct tracing_map_elt *
entry = TRACING_MAP_ENTRY(map->map, idx);
test_key = entry->key;
- if (test_key && test_key == key_hash && entry->val &&
- keys_match(key, entry->val->key, map->key_size)) {
- if (!lookup_only)
- atomic64_inc(&map->hits);
- return entry->val;
+ if (test_key && test_key == key_hash) {
+ val = READ_ONCE(entry->val);
+ if (val &&
+ keys_match(key, val->key, map->key_size)) {
+ if (!lookup_only)
+ atomic64_inc(&map->hits);
+ return val;
+ } else if (unlikely(!val)) {
+ /*
+ * The key is present. But, val (pointer to elt
+ * struct) is still NULL. which means some other
+ * thread is in the process of inserting an
+ * element.
+ *
+ * On top of that, it's key_hash is same as the
+ * one being inserted right now. So, it's
+ * possible that the element has the same
+ * key as well.
+ */
+
+ dup_try++;
+ if (dup_try > map->map_size) {
+ atomic64_inc(&map->drops);
+ break;
+ }
+ continue;
+ }
}
if (!test_key) {
@@ -452,6 +476,13 @@ static inline struct tracing_map_elt *
atomic64_inc(&map->hits);
return entry->val;
+ } else {
+ /*
+ * cmpxchg() failed. Loop around once
+ * more to check what key was inserted.
+ */
+ dup_try++;
+ continue;
}
}

View File

@ -1,66 +0,0 @@
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 16 May 2018 09:36:45 -0400
Subject: [PATCH 3/5] tracing: Restore proper field flag printing when
displaying triggers
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
From: Tom Zanussi <tom.zanussi@linux.intel.com>
[ commit 608940dabe1bd2ce4c97524004ec86637cf80f2c ]
The flag-printing code used when displaying hist triggers somehow got
dropped during refactoring of the inter-event patchset. This restores
it.
Below are a couple examples - in the first case, .usecs wasn't being
displayed properly for common_timestamps and the second illustrates
the same for other flags such as .execname.
Before:
# echo 'hist:key=common_pid.execname:val=count:sort=count' > /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger
# cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger
hist:keys=common_pid:vals=hitcount,count:sort=count:size=2048 [active]
# echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' >> /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger
# cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger
hist:keys=pid:vals=hitcount:ts0=common_timestamp:sort=hitcount:size=2048:clock=global if comm=="cyclictest" [active]
After:
# echo 'hist:key=common_pid.execname:val=count:sort=count' > /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger
# cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger
hist:keys=common_pid.execname:vals=hitcount,count:sort=count:size=2048 [active]
# echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' >> /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger
# cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger
hist:keys=pid:vals=hitcount:ts0=common_timestamp.usecs:sort=hitcount:size=2048:clock=global if comm=="cyclictest" [active]
Link: http://lkml.kernel.org/r/492bab42ff21806600af98a8ea901af10efbee0c.1524790601.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 10 ++++++++++
1 file changed, 10 insertions(+)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -4867,6 +4867,16 @@ static void hist_field_print(struct seq_
seq_printf(m, "%s", field_name);
} else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP)
seq_puts(m, "common_timestamp");
+
+ if (hist_field->flags) {
+ if (!(hist_field->flags & HIST_FIELD_FL_VAR_REF) &&
+ !(hist_field->flags & HIST_FIELD_FL_EXPR)) {
+ const char *flags = get_hist_field_flags(hist_field);
+
+ if (flags)
+ seq_printf(m, ".%s", flags);
+ }
+ }
}
static int event_hist_trigger_print(struct seq_file *m,

View File

@ -1,49 +0,0 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:41 +0200
Subject: [PATCH 4/6] ARM: at91: Implement clocksource selection
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Allow selecting and unselecting the PIT clocksource driver so it doesn't
have to be compile when unused.
Tested-by: Alexander Dahl <ada@thorsis.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/mach-at91/Kconfig | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -106,6 +106,31 @@ config SOC_AT91SAM9
AT91SAM9X35
AT91SAM9XE
+comment "Clocksource driver selection"
+
+config ATMEL_CLOCKSOURCE_PIT
+ bool "Periodic Interval Timer (PIT) support"
+ depends on SOC_AT91SAM9 || SOC_SAMA5
+ default SOC_AT91SAM9 || SOC_SAMA5
+ select ATMEL_PIT
+ help
+ Select this to get a clocksource based on the Atmel Periodic Interval
+ Timer. It has a relatively low resolution and the TC Block clocksource
+ should be preferred.
+
+config ATMEL_CLOCKSOURCE_TCB
+ bool "Timer Counter Blocks (TCB) support"
+ depends on SOC_AT91RM9200 || SOC_AT91SAM9 || SOC_SAMA5 || COMPILE_TEST
+ default SOC_AT91RM9200 || SOC_AT91SAM9 || SOC_SAMA5
+ depends on !ATMEL_TCLIB
+ select ATMEL_ARM_TCB_CLKSRC
+ help
+ Select this to get a high precision clocksource based on a
+ TC block with a 5+ MHz base clock rate.
+ On platforms with 16-bit counters, two timer channels are combined
+ to make a single 32-bit timer.
+ It can also be used as a clock event device supporting oneshot mode.
+
config HAVE_AT91_UTMI
bool

View File

@ -1,74 +0,0 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 23 Feb 2018 21:07:24 -0500
Subject: [PATCH 04/17] dput(): consolidate the "do we need to retain it?" into
an inlined helper
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit a338579f2f3d6a15c78f1dc7de4c248b4183fcea
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 42 +++++++++++++++++++++++-------------------
1 file changed, 23 insertions(+), 19 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -626,6 +626,24 @@ static inline struct dentry *lock_parent
return __lock_parent(dentry);
}
+static inline bool retain_dentry(struct dentry *dentry)
+{
+ WARN_ON(d_in_lookup(dentry));
+
+ /* Unreachable? Get rid of it */
+ if (unlikely(d_unhashed(dentry)))
+ return false;
+
+ if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
+ return false;
+
+ if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
+ if (dentry->d_op->d_delete(dentry))
+ return false;
+ }
+ return true;
+}
+
/*
* Finish off a dentry we've decided to kill.
* dentry->d_lock must be held, returns with it unlocked.
@@ -804,27 +822,13 @@ void dput(struct dentry *dentry)
/* Slow case: now with the dentry lock held */
rcu_read_unlock();
- WARN_ON(d_in_lookup(dentry));
-
- /* Unreachable? Get rid of it */
- if (unlikely(d_unhashed(dentry)))
- goto kill_it;
-
- if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
- goto kill_it;
-
- if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
- if (dentry->d_op->d_delete(dentry))
- goto kill_it;
+ if (likely(retain_dentry(dentry))) {
+ dentry_lru_add(dentry);
+ dentry->d_lockref.count--;
+ spin_unlock(&dentry->d_lock);
+ return;
}
- dentry_lru_add(dentry);
-
- dentry->d_lockref.count--;
- spin_unlock(&dentry->d_lock);
- return;
-
-kill_it:
dentry = dentry_kill(dentry);
if (dentry) {
cond_resched();

View File

@ -1,81 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 20 Dec 2017 17:12:53 +0100
Subject: [PATCH 04/29] hrtimer: Cleanup clock argument in
schedule_hrtimeout_range_clock()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
schedule_hrtimeout_range_clock() uses an integer for the clock id
instead of the predefined type "clockid_t". The ID of the clock is
indicated in hrtimer code as clock_id. Therefore change the name of
the variable as well to make it consistent.
While at it, clean up the description for the function parameters clock_id
and mode. The clock modes and the clock ids are not restricted as the
comment suggests. Fix the mode description as well for the callers of
schedule_hrtimeout_range_clock().
No functional change.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/hrtimer.h | 2 +-
kernel/time/hrtimer.c | 12 ++++++------
2 files changed, 7 insertions(+), 7 deletions(-)
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -462,7 +462,7 @@ extern int schedule_hrtimeout_range(ktim
extern int schedule_hrtimeout_range_clock(ktime_t *expires,
u64 delta,
const enum hrtimer_mode mode,
- int clock);
+ clockid_t clock_id);
extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode);
/* Soft interrupt function to run the hrtimer queues: */
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1664,12 +1664,12 @@ void __init hrtimers_init(void)
* schedule_hrtimeout_range_clock - sleep until timeout
* @expires: timeout value (ktime_t)
* @delta: slack in expires timeout (ktime_t)
- * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
- * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
+ * @mode: timer mode
+ * @clock_id: timer clock to be used
*/
int __sched
schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
- const enum hrtimer_mode mode, int clock)
+ const enum hrtimer_mode mode, clockid_t clock_id)
{
struct hrtimer_sleeper t;
@@ -1690,7 +1690,7 @@ schedule_hrtimeout_range_clock(ktime_t *
return -EINTR;
}
- hrtimer_init_on_stack(&t.timer, clock, mode);
+ hrtimer_init_on_stack(&t.timer, clock_id, mode);
hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
hrtimer_init_sleeper(&t, current);
@@ -1712,7 +1712,7 @@ schedule_hrtimeout_range_clock(ktime_t *
* schedule_hrtimeout_range - sleep until timeout
* @expires: timeout value (ktime_t)
* @delta: slack in expires timeout (ktime_t)
- * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
+ * @mode: timer mode
*
* Make the current task sleep until the given expiry time has
* elapsed. The routine will return immediately unless
@@ -1751,7 +1751,7 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_ran
/**
* schedule_hrtimeout - sleep until timeout
* @expires: timeout value (ktime_t)
- * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
+ * @mode: timer mode
*
* Make the current task sleep until the given expiry time has
* elapsed. The routine will return immediately unless

View File

@ -1,51 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:36 +0100
Subject: [PATCH 04/10] iommu/amd: Split irq_lookup_table out of the
amd_iommu_devtable_lock
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit ea6166f4b83e9cfba1c18f46a764d50045682fe5
The function get_irq_table() reads/writes irq_lookup_table while holding
the amd_iommu_devtable_lock. It also modifies
amd_iommu_dev_table[].data[2].
set_dte_entry() is using amd_iommu_dev_table[].data[0|1] (under the
domain->lock) so it should be okay. The access to the iommu is
serialized with its own (iommu's) lock.
So split out get_irq_table() out of amd_iommu_devtable_lock's lock.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -82,6 +82,7 @@
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
static DEFINE_SPINLOCK(pd_bitmap_lock);
+static DEFINE_SPINLOCK(iommu_table_lock);
/* List of all available dev_data structures */
static LLIST_HEAD(dev_data_list);
@@ -3609,7 +3610,7 @@ static struct irq_remap_table *alloc_irq
unsigned long flags;
u16 alias;
- write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ spin_lock_irqsave(&iommu_table_lock, flags);
iommu = amd_iommu_rlookup_table[devid];
if (!iommu)
@@ -3674,7 +3675,7 @@ static struct irq_remap_table *alloc_irq
iommu_completion_wait(iommu);
out_unlock:
- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ spin_unlock_irqrestore(&iommu_table_lock, flags);
return table;
}

View File

@ -1,69 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Mon, 7 May 2018 16:44:57 +0200
Subject: [PATCH] locking/refcount: implement
refcount_dec_and_lock_irqsave()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
There are in-tree users of refcount_dec_and_lock() which must acquire the
spin lock with interrupts disabled. To workaround the lack of an irqsave
variant of refcount_dec_and_lock() they use local_irq_save() at the call
site. This causes extra code and creates in some places unneeded long
interrupt disabled times. These places need also extra treatment for
PREEMPT_RT due to the disconnect of the irq disabling and the lock
function.
Implement the missing irqsave variant of the function.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
[bigeasy: s@atomic_dec_and_lock@refcount_dec_and_lock@g]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/refcount.h | 4 +++-
lib/refcount.c | 28 ++++++++++++++++++++++++++++
2 files changed, 31 insertions(+), 1 deletion(-)
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -98,5 +98,7 @@ extern __must_check bool refcount_dec_if
extern __must_check bool refcount_dec_not_one(refcount_t *r);
extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock);
extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock);
-
+extern __must_check bool refcount_dec_and_lock_irqsave(refcount_t *r,
+ spinlock_t *lock,
+ unsigned long *flags);
#endif /* _LINUX_REFCOUNT_H */
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -350,3 +350,31 @@ bool refcount_dec_and_lock(refcount_t *r
}
EXPORT_SYMBOL(refcount_dec_and_lock);
+/**
+ * refcount_dec_and_lock_irqsave - return holding spinlock with disabled
+ * interrupts if able to decrement refcount to 0
+ * @r: the refcount
+ * @lock: the spinlock to be locked
+ * @flags: saved IRQ-flags if the is acquired
+ *
+ * Same as refcount_dec_and_lock() above except that the spinlock is acquired
+ * with disabled interupts.
+ *
+ * Return: true and hold spinlock if able to decrement refcount to 0, false
+ * otherwise
+ */
+bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock,
+ unsigned long *flags)
+{
+ if (refcount_dec_not_one(r))
+ return false;
+
+ spin_lock_irqsave(lock, *flags);
+ if (!refcount_dec_and_test(r)) {
+ spin_unlock_irqrestore(lock, *flags);
+ return false;
+ }
+
+ return true;
+}
+EXPORT_SYMBOL(refcount_dec_and_lock_irqsave);

View File

@ -1,45 +0,0 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 22 Dec 2017 15:51:15 +0100
Subject: [PATCH 4/4] timerqueue: Document return values of
timerqueue_add/del()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.8-rt9.tar.xz
The return values of timerqueue_add/del() are not documented in the kernel doc
comment. Add proper documentation.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: rt@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
lib/timerqueue.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index 4a720ed4fdaf..0d54bcbc8170 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -33,8 +33,9 @@
* @head: head of timerqueue
* @node: timer node to be added
*
- * Adds the timer node to the timerqueue, sorted by the
- * node's expires value.
+ * Adds the timer node to the timerqueue, sorted by the node's expires
+ * value. Returns true if the newly added timer is the first expiring timer in
+ * the queue.
*/
bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
{
@@ -70,7 +71,8 @@ EXPORT_SYMBOL_GPL(timerqueue_add);
* @head: head of timerqueue
* @node: timer node to be removed
*
- * Removes the timer node from the timerqueue.
+ * Removes the timer node from the timerqueue. Returns true if the queue is
+ * not empty after the remove.
*/
bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
{
--
2.15.1

View File

@ -1,189 +0,0 @@
From: Vedang Patel <vedang.patel@intel.com>
Date: Mon, 15 Jan 2018 20:51:38 -0600
Subject: [PATCH 04/37] tracing: Remove code which merges duplicates
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
We now have the logic to detect and remove duplicates in the
tracing_map hash table. The code which merges duplicates in the
histogram is redundant now. So, modify this code just to detect
duplicates. The duplication detection code is still kept to ensure
that any rare race condition which might cause duplicates does not go
unnoticed.
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 11 -----
kernel/trace/tracing_map.c | 83 ++-------------------------------------
kernel/trace/tracing_map.h | 7 ---
3 files changed, 6 insertions(+), 95 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -340,16 +340,6 @@ static int hist_trigger_elt_comm_alloc(s
return 0;
}
-static void hist_trigger_elt_comm_copy(struct tracing_map_elt *to,
- struct tracing_map_elt *from)
-{
- char *comm_from = from->private_data;
- char *comm_to = to->private_data;
-
- if (comm_from)
- memcpy(comm_to, comm_from, TASK_COMM_LEN + 1);
-}
-
static void hist_trigger_elt_comm_init(struct tracing_map_elt *elt)
{
char *comm = elt->private_data;
@@ -360,7 +350,6 @@ static void hist_trigger_elt_comm_init(s
static const struct tracing_map_ops hist_trigger_elt_comm_ops = {
.elt_alloc = hist_trigger_elt_comm_alloc,
- .elt_copy = hist_trigger_elt_comm_copy,
.elt_free = hist_trigger_elt_comm_free,
.elt_init = hist_trigger_elt_comm_init,
};
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -847,67 +847,15 @@ create_sort_entry(void *key, struct trac
return sort_entry;
}
-static struct tracing_map_elt *copy_elt(struct tracing_map_elt *elt)
-{
- struct tracing_map_elt *dup_elt;
- unsigned int i;
-
- dup_elt = tracing_map_elt_alloc(elt->map);
- if (IS_ERR(dup_elt))
- return NULL;
-
- if (elt->map->ops && elt->map->ops->elt_copy)
- elt->map->ops->elt_copy(dup_elt, elt);
-
- dup_elt->private_data = elt->private_data;
- memcpy(dup_elt->key, elt->key, elt->map->key_size);
-
- for (i = 0; i < elt->map->n_fields; i++) {
- atomic64_set(&dup_elt->fields[i].sum,
- atomic64_read(&elt->fields[i].sum));
- dup_elt->fields[i].cmp_fn = elt->fields[i].cmp_fn;
- }
-
- return dup_elt;
-}
-
-static int merge_dup(struct tracing_map_sort_entry **sort_entries,
- unsigned int target, unsigned int dup)
-{
- struct tracing_map_elt *target_elt, *elt;
- bool first_dup = (target - dup) == 1;
- int i;
-
- if (first_dup) {
- elt = sort_entries[target]->elt;
- target_elt = copy_elt(elt);
- if (!target_elt)
- return -ENOMEM;
- sort_entries[target]->elt = target_elt;
- sort_entries[target]->elt_copied = true;
- } else
- target_elt = sort_entries[target]->elt;
-
- elt = sort_entries[dup]->elt;
-
- for (i = 0; i < elt->map->n_fields; i++)
- atomic64_add(atomic64_read(&elt->fields[i].sum),
- &target_elt->fields[i].sum);
-
- sort_entries[dup]->dup = true;
-
- return 0;
-}
-
-static int merge_dups(struct tracing_map_sort_entry **sort_entries,
+static void detect_dups(struct tracing_map_sort_entry **sort_entries,
int n_entries, unsigned int key_size)
{
unsigned int dups = 0, total_dups = 0;
- int err, i, j;
+ int i;
void *key;
if (n_entries < 2)
- return total_dups;
+ return;
sort(sort_entries, n_entries, sizeof(struct tracing_map_sort_entry *),
(int (*)(const void *, const void *))cmp_entries_dup, NULL);
@@ -916,30 +864,14 @@ static int merge_dups(struct tracing_map
for (i = 1; i < n_entries; i++) {
if (!memcmp(sort_entries[i]->key, key, key_size)) {
dups++; total_dups++;
- err = merge_dup(sort_entries, i - dups, i);
- if (err)
- return err;
continue;
}
key = sort_entries[i]->key;
dups = 0;
}
- if (!total_dups)
- return total_dups;
-
- for (i = 0, j = 0; i < n_entries; i++) {
- if (!sort_entries[i]->dup) {
- sort_entries[j] = sort_entries[i];
- if (j++ != i)
- sort_entries[i] = NULL;
- } else {
- destroy_sort_entry(sort_entries[i]);
- sort_entries[i] = NULL;
- }
- }
-
- return total_dups;
+ WARN_ONCE(total_dups > 0,
+ "Duplicates detected: %d\n", total_dups);
}
static bool is_key(struct tracing_map *map, unsigned int field_idx)
@@ -1065,10 +997,7 @@ int tracing_map_sort_entries(struct trac
return 1;
}
- ret = merge_dups(entries, n_entries, map->key_size);
- if (ret < 0)
- goto free;
- n_entries -= ret;
+ detect_dups(entries, n_entries, map->key_size);
if (is_key(map, sort_keys[0].field_idx))
cmp_entries_fn = cmp_entries_key;
--- a/kernel/trace/tracing_map.h
+++ b/kernel/trace/tracing_map.h
@@ -215,11 +215,6 @@ struct tracing_map {
* Element allocation occurs before tracing begins, when the
* tracing_map_init() call is made by client code.
*
- * @elt_copy: At certain points in the lifetime of an element, it may
- * need to be copied. The copy should include a copy of the
- * client-allocated data, which can be copied into the 'to'
- * element from the 'from' element.
- *
* @elt_free: When a tracing_map_elt is freed, this function is called
* and allows client-allocated per-element data to be freed.
*
@@ -233,8 +228,6 @@ struct tracing_map {
*/
struct tracing_map_ops {
int (*elt_alloc)(struct tracing_map_elt *elt);
- void (*elt_copy)(struct tracing_map_elt *to,
- struct tracing_map_elt *from);
void (*elt_free)(struct tracing_map_elt *elt);
void (*elt_clear)(struct tracing_map_elt *elt);
void (*elt_init)(struct tracing_map_elt *elt);

View File

@ -1,27 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Fri, 22 Sep 2017 14:58:18 -0500
Subject: [PATCH 04/42] tracing: Remove lookups from tracing_map hitcount
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Lookups inflate the hitcount, making it essentially useless. Only
inserts and updates should really affect the hitcount anyway, so
explicitly filter lookups out.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/tracing_map.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -428,7 +428,8 @@ static inline struct tracing_map_elt *
if (test_key && test_key == key_hash && entry->val &&
keys_match(key, entry->val->key, map->key_size)) {
- atomic64_inc(&map->hits);
+ if (!lookup_only)
+ atomic64_inc(&map->hits);
return entry->val;
}

View File

@ -1,37 +0,0 @@
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 16 May 2018 09:36:46 -0400
Subject: [PATCH 4/5] tracing: Uninitialized variable in
create_tracing_map_fields()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
From: Dan Carpenter <dan.carpenter@oracle.com>
[ commit b28d7b2dc27f0eef1ae608b49d6860f2463910f1 ]
Smatch complains that idx can be used uninitialized when we check if
(idx < 0). It has to be the first iteration through the loop and the
HIST_FIELD_FL_STACKTRACE bit has to be clear and the HIST_FIELD_FL_VAR
bit has to be set to reach the bug.
Link: http://lkml.kernel.org/r/20180328114815.GC29050@mwanda
Fixes: 30350d65ac56 ("tracing: Add variable support to hist triggers")
Acked-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -4412,7 +4412,7 @@ static int create_tracing_map_fields(str
struct tracing_map *map = hist_data->map;
struct ftrace_event_field *field;
struct hist_field *hist_field;
- int i, idx;
+ int i, idx = 0;
for_each_hist_field(i, hist_data) {
hist_field = hist_data->fields[i];

View File

@ -1,35 +0,0 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:42 +0200
Subject: [PATCH 5/6] ARM: configs: at91: use new TCB timer driver
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Unselecting ATMEL_TCLIB switches the TCB timer driver from tcb_clksrc to
timer-atmel-tcb.
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/configs/at91_dt_defconfig | 1 -
arch/arm/configs/sama5_defconfig | 1 -
2 files changed, 2 deletions(-)
--- a/arch/arm/configs/at91_dt_defconfig
+++ b/arch/arm/configs/at91_dt_defconfig
@@ -64,7 +64,6 @@ CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=4
CONFIG_BLK_DEV_RAM_SIZE=8192
-CONFIG_ATMEL_TCLIB=y
CONFIG_ATMEL_SSC=y
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
--- a/arch/arm/configs/sama5_defconfig
+++ b/arch/arm/configs/sama5_defconfig
@@ -75,7 +75,6 @@ CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=4
CONFIG_BLK_DEV_RAM_SIZE=8192
-CONFIG_ATMEL_TCLIB=y
CONFIG_ATMEL_SSC=y
CONFIG_EEPROM_AT24=y
CONFIG_SCSI=y

View File

@ -1,31 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 4 Apr 2018 11:43:56 +0200
Subject: [PATCH] bdi: Use irqsave variant of refcount_dec_and_lock()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
The irqsave variant of refcount_dec_and_lock handles irqsave/restore when
taking/releasing the spin lock. With this variant the call of
local_irq_save/restore is no longer required.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
[bigeasy: s@atomic_dec_and_lock@refcount_dec_and_lock@g ]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
mm/backing-dev.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -495,11 +495,8 @@ void wb_congested_put(struct bdi_writeba
{
unsigned long flags;
- local_irq_save(flags);
- if (!refcount_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
- local_irq_restore(flags);
+ if (!refcount_dec_and_lock_irqsave(&congested->refcnt, &cgwb_lock, &flags))
return;
- }
/* bdi might already have been destroyed leaving @congested unlinked */
if (congested->__bdi) {

View File

@ -1,55 +0,0 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 6 Mar 2018 21:37:31 -0500
Subject: [PATCH 05/17] handle move to LRU in retain_dentry()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 62d9956cefe6ecc4b43a7fae37af78ba7adaceaa
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 19 ++++++-------------
1 file changed, 6 insertions(+), 13 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -440,17 +440,6 @@ static void d_lru_shrink_move(struct lis
list_lru_isolate_move(lru, &dentry->d_lru, list);
}
-/*
- * dentry_lru_(add|del)_list) must be called with d_lock held.
- */
-static void dentry_lru_add(struct dentry *dentry)
-{
- if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
- d_lru_add(dentry);
- else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED)))
- dentry->d_flags |= DCACHE_REFERENCED;
-}
-
/**
* d_drop - drop a dentry
* @dentry: dentry to drop
@@ -641,6 +630,12 @@ static inline bool retain_dentry(struct
if (dentry->d_op->d_delete(dentry))
return false;
}
+ /* retain; LRU fodder */
+ dentry->d_lockref.count--;
+ if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
+ d_lru_add(dentry);
+ else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED)))
+ dentry->d_flags |= DCACHE_REFERENCED;
return true;
}
@@ -823,8 +818,6 @@ void dput(struct dentry *dentry)
rcu_read_unlock();
if (likely(retain_dentry(dentry))) {
- dentry_lru_add(dentry);
- dentry->d_lockref.count--;
spin_unlock(&dentry->d_lock);
return;
}

View File

@ -1,61 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 20 Dec 2017 17:12:54 +0100
Subject: [PATCH 05/29] hrtimer: Fix hrtimer function description
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
The hrtimer_start[_range_ns]() starts a timer reliable on this CPU only
when HRTIMER_MODE_PINNED is set. Furthermore the HRTIMER_MODE_PINNED mode
is not considered, when a hrtimer is initialized.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/hrtimer.h | 6 +++---
kernel/time/hrtimer.c | 9 +++++----
2 files changed, 8 insertions(+), 7 deletions(-)
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -361,11 +361,11 @@ extern void hrtimer_start_range_ns(struc
u64 range_ns, const enum hrtimer_mode mode);
/**
- * hrtimer_start - (re)start an hrtimer on the current CPU
+ * hrtimer_start - (re)start an hrtimer
* @timer: the timer to be added
* @tim: expiry time
- * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
- * relative (HRTIMER_MODE_REL)
+ * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or
+ * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED)
*/
static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim,
const enum hrtimer_mode mode)
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -926,12 +926,12 @@ static inline ktime_t hrtimer_update_low
}
/**
- * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
+ * hrtimer_start_range_ns - (re)start an hrtimer
* @timer: the timer to be added
* @tim: expiry time
* @delta_ns: "slack" range for the timer
- * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
- * relative (HRTIMER_MODE_REL)
+ * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or
+ * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED)
*/
void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
u64 delta_ns, const enum hrtimer_mode mode)
@@ -1109,7 +1109,8 @@ static void __hrtimer_init(struct hrtime
* hrtimer_init - initialize a timer to the given clock
* @timer: the timer to be initialized
* @clock_id: the clock to be used
- * @mode: timer mode abs/rel
+ * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or
+ * relative (HRTIMER_MODE_REL); pinned is not considered here!
*/
void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
enum hrtimer_mode mode)

View File

@ -1,95 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:37 +0100
Subject: [PATCH 05/10] iommu/amd: Remove the special case from
alloc_irq_table()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit fde65dd3d3096e8f6ecc7bbe544eb91f4220772c
alloc_irq_table() has a special ioapic argument. If set then it will
pre-allocate / reserve the first 32 indexes. The argument is only once
true and it would make alloc_irq_table() a little simpler if we would
extract the special bits to the caller.
The caller of irq_remapping_alloc() is holding irq_domain_mutex so the
initialization of iommu->irte_ops->set_allocated() should not race
against other user.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 34 ++++++++++++++++++++--------------
1 file changed, 20 insertions(+), 14 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3603,7 +3603,7 @@ static struct irq_remap_table *get_irq_t
return table;
}
-static struct irq_remap_table *alloc_irq_table(u16 devid, bool ioapic)
+static struct irq_remap_table *alloc_irq_table(u16 devid)
{
struct irq_remap_table *table = NULL;
struct amd_iommu *iommu;
@@ -3637,10 +3637,6 @@ static struct irq_remap_table *alloc_irq
/* Initialize table spin-lock */
raw_spin_lock_init(&table->lock);
- if (ioapic)
- /* Keep the first 32 indexes free for IOAPIC interrupts */
- table->min_index = 32;
-
table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_ATOMIC);
if (!table->table) {
kfree(table);
@@ -3655,12 +3651,6 @@ static struct irq_remap_table *alloc_irq
memset(table->table, 0,
(MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
- if (ioapic) {
- int i;
-
- for (i = 0; i < 32; ++i)
- iommu->irte_ops->set_allocated(table, i);
- }
irq_lookup_table[devid] = table;
set_dte_irq_entry(devid, table);
@@ -3690,7 +3680,7 @@ static int alloc_irq_index(u16 devid, in
if (!iommu)
return -ENODEV;
- table = alloc_irq_table(devid, false);
+ table = alloc_irq_table(devid);
if (!table)
return -ENODEV;
@@ -4116,10 +4106,26 @@ static int irq_remapping_alloc(struct ir
return ret;
if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) {
- if (alloc_irq_table(devid, true))
+ struct irq_remap_table *table;
+ struct amd_iommu *iommu;
+
+ table = alloc_irq_table(devid);
+ if (table) {
+ if (!table->min_index) {
+ /*
+ * Keep the first 32 indexes free for IOAPIC
+ * interrupts.
+ */
+ table->min_index = 32;
+ iommu = amd_iommu_rlookup_table[devid];
+ for (i = 0; i < 32; ++i)
+ iommu->irte_ops->set_allocated(table, i);
+ }
+ WARN_ON(table->min_index != 32);
index = info->ioapic_pin;
- else
+ } else {
ret = -ENOMEM;
+ }
} else {
bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI);

View File

@ -1,129 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:39 -0600
Subject: [PATCH 05/37] ring-buffer: Add interface for setting absolute time
stamps
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Define a new function, tracing_set_time_stamp_abs(), which can be used
to enable or disable the use of absolute timestamps rather than time
deltas for a trace array.
Only the interface is added here; a subsequent patch will add the
underlying implementation.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Baohong Liu <baohong.liu@intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/ring_buffer.h | 2 ++
kernel/trace/ring_buffer.c | 11 +++++++++++
kernel/trace/trace.c | 33 ++++++++++++++++++++++++++++++++-
kernel/trace/trace.h | 3 +++
4 files changed, 48 insertions(+), 1 deletion(-)
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -181,6 +181,8 @@ void ring_buffer_normalize_time_stamp(st
int cpu, u64 *ts);
void ring_buffer_set_clock(struct ring_buffer *buffer,
u64 (*clock)(void));
+void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs);
+bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer);
size_t ring_buffer_page_len(void *page);
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -489,6 +489,7 @@ struct ring_buffer {
u64 (*clock)(void);
struct rb_irq_work irq_work;
+ bool time_stamp_abs;
};
struct ring_buffer_iter {
@@ -1383,6 +1384,16 @@ void ring_buffer_set_clock(struct ring_b
buffer->clock = clock;
}
+void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs)
+{
+ buffer->time_stamp_abs = abs;
+}
+
+bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer)
+{
+ return buffer->time_stamp_abs;
+}
+
static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
static inline unsigned long rb_page_entries(struct buffer_page *bpage)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2269,7 +2269,7 @@ trace_event_buffer_lock_reserve(struct r
*current_rb = trace_file->tr->trace_buffer.buffer;
- if ((trace_file->flags &
+ if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
(EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
(entry = this_cpu_read(trace_buffered_event))) {
/* Try to use the per cpu buffer first */
@@ -6286,6 +6286,37 @@ static int tracing_clock_open(struct ino
return ret;
}
+
+int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
+{
+ int ret = 0;
+
+ mutex_lock(&trace_types_lock);
+
+ if (abs && tr->time_stamp_abs_ref++)
+ goto out;
+
+ if (!abs) {
+ if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (--tr->time_stamp_abs_ref)
+ goto out;
+ }
+
+ ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+ if (tr->max_buffer.buffer)
+ ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
+#endif
+ out:
+ mutex_unlock(&trace_types_lock);
+
+ return ret;
+}
struct ftrace_buffer_info {
struct trace_iterator iter;
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -273,6 +273,7 @@ struct trace_array {
/* function tracing enabled */
int function_enabled;
#endif
+ int time_stamp_abs_ref;
};
enum {
@@ -286,6 +287,8 @@ extern struct mutex trace_types_lock;
extern int trace_array_get(struct trace_array *tr);
extern void trace_array_put(struct trace_array *tr);
+extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs);
+
/*
* The global tracer (top) should be the first trace array added,
* but we check the flag anyway.

View File

@ -1,30 +0,0 @@
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 23 Mar 2018 14:37:36 +0300
Subject: [PATCH 5/5] tracing: Fix a potential NULL dereference
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
We forgot to set the error code on this path so we return ERR_PTR(0)
which is NULL. It results in a NULL dereference in the caller.
Link: http://lkml.kernel.org/r/20180323113735.GC28518@mwanda
Fixes: 100719dcef44 ("tracing: Add simple expression support to hist triggers")
Acked-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 5e4cf2bf6d1c198a90ccc0df5ffd8e0d4ea36b48)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 1 +
1 file changed, 1 insertion(+)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -2777,6 +2777,7 @@ static struct hist_field *parse_expr(str
expr->fn = hist_field_plus;
break;
default:
+ ret = -EINVAL;
goto free;
}

View File

@ -1,25 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Fri, 22 Sep 2017 14:58:19 -0500
Subject: [PATCH 05/42] tracing: Increase tracing map KEYS_MAX size
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
The current default for the number of subkeys in a compound key is 2,
which is too restrictive. Increase it to a more realistic value of 3.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/tracing_map.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/kernel/trace/tracing_map.h
+++ b/kernel/trace/tracing_map.h
@@ -6,7 +6,7 @@
#define TRACING_MAP_BITS_MAX 17
#define TRACING_MAP_BITS_MIN 7
-#define TRACING_MAP_KEYS_MAX 2
+#define TRACING_MAP_KEYS_MAX 3
#define TRACING_MAP_VALS_MAX 3
#define TRACING_MAP_FIELDS_MAX (TRACING_MAP_KEYS_MAX + \
TRACING_MAP_VALS_MAX)

View File

@ -1,36 +0,0 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:43 +0200
Subject: [PATCH 6/6] ARM: configs: at91: unselect PIT
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
The PIT is not required anymore to successfully boot and may actually harm
in case preempt-rt is used because the PIT interrupt is shared.
Disable it so the TCB clocksource is used.
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/configs/at91_dt_defconfig | 1 +
arch/arm/configs/sama5_defconfig | 1 +
2 files changed, 2 insertions(+)
--- a/arch/arm/configs/at91_dt_defconfig
+++ b/arch/arm/configs/at91_dt_defconfig
@@ -19,6 +19,7 @@ CONFIG_ARCH_MULTI_V5=y
CONFIG_ARCH_AT91=y
CONFIG_SOC_AT91RM9200=y
CONFIG_SOC_AT91SAM9=y
+# CONFIG_ATMEL_CLOCKSOURCE_PIT is not set
CONFIG_AEABI=y
CONFIG_UACCESS_WITH_MEMCPY=y
CONFIG_ZBOOT_ROM_TEXT=0x0
--- a/arch/arm/configs/sama5_defconfig
+++ b/arch/arm/configs/sama5_defconfig
@@ -20,6 +20,7 @@ CONFIG_ARCH_AT91=y
CONFIG_SOC_SAMA5D2=y
CONFIG_SOC_SAMA5D3=y
CONFIG_SOC_SAMA5D4=y
+# CONFIG_ATMEL_CLOCKSOURCE_PIT is not set
CONFIG_AEABI=y
CONFIG_UACCESS_WITH_MEMCPY=y
CONFIG_ZBOOT_ROM_TEXT=0x0

View File

@ -1,73 +0,0 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 23 Feb 2018 21:25:42 -0500
Subject: [PATCH 06/17] get rid of trylock loop around dentry_kill()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit f657a666fd1b1b9fe59963943c74c245ae66f4cc
In case when trylock in there fails, deal with it directly in
dentry_kill(). Note that in cases when we drop and retake
->d_lock, we need to recheck whether to retain the dentry.
Another thing is that dropping/retaking ->d_lock might have
ended up with negative dentry turning into positive; that,
of course, can happen only once...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 34 +++++++++++++++++++++++++++-------
1 file changed, 27 insertions(+), 7 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -651,23 +651,43 @@ static struct dentry *dentry_kill(struct
struct dentry *parent = NULL;
if (inode && unlikely(!spin_trylock(&inode->i_lock)))
- goto failed;
+ goto slow_positive;
if (!IS_ROOT(dentry)) {
parent = dentry->d_parent;
if (unlikely(!spin_trylock(&parent->d_lock))) {
- if (inode)
- spin_unlock(&inode->i_lock);
- goto failed;
+ parent = __lock_parent(dentry);
+ if (likely(inode || !dentry->d_inode))
+ goto got_locks;
+ /* negative that became positive */
+ if (parent)
+ spin_unlock(&parent->d_lock);
+ inode = dentry->d_inode;
+ goto slow_positive;
}
}
-
__dentry_kill(dentry);
return parent;
-failed:
+slow_positive:
+ spin_unlock(&dentry->d_lock);
+ spin_lock(&inode->i_lock);
+ spin_lock(&dentry->d_lock);
+ parent = lock_parent(dentry);
+got_locks:
+ if (unlikely(dentry->d_lockref.count != 1)) {
+ dentry->d_lockref.count--;
+ } else if (likely(!retain_dentry(dentry))) {
+ __dentry_kill(dentry);
+ return parent;
+ }
+ /* we are keeping it, after all */
+ if (inode)
+ spin_unlock(&inode->i_lock);
+ if (parent)
+ spin_unlock(&parent->d_lock);
spin_unlock(&dentry->d_lock);
- return dentry; /* try again with same dentry */
+ return NULL;
}
/*

View File

@ -1,41 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 20 Dec 2017 17:12:55 +0100
Subject: [PATCH 06/29] hrtimer: Ensure POSIX compliance (relative
CLOCK_REALTIME hrtimers)
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
POSIX specification defines, that relative CLOCK_REALTIME timers are not
affected by clock modifications. Those timers have to use CLOCK_MONOTONIC
to ensure POSIX compliance.
The introduction of the additional mode HRTIMER_MODE_PINNED broke this
requirement for pinned timers. There is no user space visible impact
because user space timers are not using the pinned mode, but for
consistency reasons this needs to be fixed.
Check whether the mode has the HRTIMER_MODE_REL bit set instead of
comparing with HRTIMER_MODE_ABS.
Fixes: 597d0275736d ("timers: Framework for identifying pinned timers")
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/time/hrtimer.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1097,7 +1097,12 @@ static void __hrtimer_init(struct hrtime
cpu_base = raw_cpu_ptr(&hrtimer_bases);
- if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
+ /*
+ * Posix magic: Relative CLOCK_REALTIME timers are not affected by
+ * clock modifications, so they needs to become CLOCK_MONOTONIC to
+ * ensure Posix compliance.
+ */
+ if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL)
clock_id = CLOCK_MONOTONIC;
base = hrtimer_clockid_to_base(clock_id);

View File

@ -1,53 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:38 +0100
Subject: [PATCH 06/10] iommu/amd: Use `table' instead `irt' as variable name
in amd_iommu_update_ga()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 4fde541c9dc114c5b448ad34b0286fe8b7c550f1
The variable of type struct irq_remap_table is always named `table'
except in amd_iommu_update_ga() where it is called `irt'. Make it
consistent and name it also `table'.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4391,7 +4391,7 @@ int amd_iommu_update_ga(int cpu, bool is
{
unsigned long flags;
struct amd_iommu *iommu;
- struct irq_remap_table *irt;
+ struct irq_remap_table *table;
struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
int devid = ir_data->irq_2_irte.devid;
struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
@@ -4405,11 +4405,11 @@ int amd_iommu_update_ga(int cpu, bool is
if (!iommu)
return -ENODEV;
- irt = get_irq_table(devid);
- if (!irt)
+ table = get_irq_table(devid);
+ if (!table)
return -ENODEV;
- raw_spin_lock_irqsave(&irt->lock, flags);
+ raw_spin_lock_irqsave(&table->lock, flags);
if (ref->lo.fields_vapic.guest_mode) {
if (cpu >= 0)
@@ -4418,7 +4418,7 @@ int amd_iommu_update_ga(int cpu, bool is
barrier();
}
- raw_spin_unlock_irqrestore(&irt->lock, flags);
+ raw_spin_unlock_irqrestore(&table->lock, flags);
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);

View File

@ -1,319 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:40 -0600
Subject: [PATCH 06/37] ring-buffer: Redefine the unimplemented
RINGBUF_TYPE_TIME_STAMP
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
RINGBUF_TYPE_TIME_STAMP is defined but not used, and from what I can
gather was reserved for something like an absolute timestamp feature
for the ring buffer, if not a complete replacement of the current
time_delta scheme.
This code redefines RINGBUF_TYPE_TIME_STAMP to implement absolute time
stamps. Another way to look at it is that it essentially forces
extended time_deltas for all events.
The motivation for doing this is to enable time_deltas that aren't
dependent on previous events in the ring buffer, making it feasible to
use the ring_buffer_event timetamps in a more random-access way, for
purposes other than serial event printing.
To set/reset this mode, use tracing_set_timestamp_abs() from the
previous interface patch.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/ring_buffer.h | 12 ++---
kernel/trace/ring_buffer.c | 104 ++++++++++++++++++++++++++++++++------------
2 files changed, 83 insertions(+), 33 deletions(-)
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -37,10 +37,12 @@ struct ring_buffer_event {
* array[0] = time delta (28 .. 59)
* size = 8 bytes
*
- * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock
- * array[0] = tv_nsec
- * array[1..2] = tv_sec
- * size = 16 bytes
+ * @RINGBUF_TYPE_TIME_STAMP: Absolute timestamp
+ * Same format as TIME_EXTEND except that the
+ * value is an absolute timestamp, not a delta
+ * event.time_delta contains bottom 27 bits
+ * array[0] = top (28 .. 59) bits
+ * size = 8 bytes
*
* <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX:
* Data record
@@ -57,12 +59,12 @@ enum ring_buffer_type {
RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28,
RINGBUF_TYPE_PADDING,
RINGBUF_TYPE_TIME_EXTEND,
- /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
RINGBUF_TYPE_TIME_STAMP,
};
unsigned ring_buffer_event_length(struct ring_buffer_event *event);
void *ring_buffer_event_data(struct ring_buffer_event *event);
+u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event);
/*
* ring_buffer_discard_commit will remove an event that has not
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -42,6 +42,8 @@ int ring_buffer_print_entry_header(struc
RINGBUF_TYPE_PADDING);
trace_seq_printf(s, "\ttime_extend : type == %d\n",
RINGBUF_TYPE_TIME_EXTEND);
+ trace_seq_printf(s, "\ttime_stamp : type == %d\n",
+ RINGBUF_TYPE_TIME_STAMP);
trace_seq_printf(s, "\tdata max type_len == %d\n",
RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
@@ -141,12 +143,15 @@ int ring_buffer_print_entry_header(struc
enum {
RB_LEN_TIME_EXTEND = 8,
- RB_LEN_TIME_STAMP = 16,
+ RB_LEN_TIME_STAMP = 8,
};
#define skip_time_extend(event) \
((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
+#define extended_time(event) \
+ (event->type_len >= RINGBUF_TYPE_TIME_EXTEND)
+
static inline int rb_null_event(struct ring_buffer_event *event)
{
return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -210,7 +215,7 @@ rb_event_ts_length(struct ring_buffer_ev
{
unsigned len = 0;
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+ if (extended_time(event)) {
/* time extends include the data event after it */
len = RB_LEN_TIME_EXTEND;
event = skip_time_extend(event);
@@ -232,7 +237,7 @@ unsigned ring_buffer_event_length(struct
{
unsigned length;
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ if (extended_time(event))
event = skip_time_extend(event);
length = rb_event_length(event);
@@ -249,7 +254,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_leng
static __always_inline void *
rb_event_data(struct ring_buffer_event *event)
{
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ if (extended_time(event))
event = skip_time_extend(event);
BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
/* If length is in len field, then array[0] has the data */
@@ -276,6 +281,27 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data
#define TS_MASK ((1ULL << TS_SHIFT) - 1)
#define TS_DELTA_TEST (~TS_MASK)
+/**
+ * ring_buffer_event_time_stamp - return the event's extended timestamp
+ * @event: the event to get the timestamp of
+ *
+ * Returns the extended timestamp associated with a data event.
+ * An extended time_stamp is a 64-bit timestamp represented
+ * internally in a special way that makes the best use of space
+ * contained within a ring buffer event. This function decodes
+ * it and maps it to a straight u64 value.
+ */
+u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event)
+{
+ u64 ts;
+
+ ts = event->array[0];
+ ts <<= TS_SHIFT;
+ ts += event->time_delta;
+
+ return ts;
+}
+
/* Flag when events were overwritten */
#define RB_MISSED_EVENTS (1 << 31)
/* Missed count stored at end */
@@ -2225,12 +2251,15 @@ rb_move_tail(struct ring_buffer_per_cpu
/* Slow path, do not inline */
static noinline struct ring_buffer_event *
-rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
+rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs)
{
- event->type_len = RINGBUF_TYPE_TIME_EXTEND;
+ if (abs)
+ event->type_len = RINGBUF_TYPE_TIME_STAMP;
+ else
+ event->type_len = RINGBUF_TYPE_TIME_EXTEND;
- /* Not the first event on the page? */
- if (rb_event_index(event)) {
+ /* Not the first event on the page, or not delta? */
+ if (abs || rb_event_index(event)) {
event->time_delta = delta & TS_MASK;
event->array[0] = delta >> TS_SHIFT;
} else {
@@ -2273,7 +2302,9 @@ rb_update_event(struct ring_buffer_per_c
* add it to the start of the resevered space.
*/
if (unlikely(info->add_timestamp)) {
- event = rb_add_time_stamp(event, delta);
+ bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer);
+
+ event = rb_add_time_stamp(event, info->delta, abs);
length -= RB_LEN_TIME_EXTEND;
delta = 0;
}
@@ -2461,7 +2492,7 @@ static __always_inline void rb_end_commi
static inline void rb_event_discard(struct ring_buffer_event *event)
{
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ if (extended_time(event))
event = skip_time_extend(event);
/* array[0] holds the actual length for the discarded event */
@@ -2505,10 +2536,11 @@ rb_update_write_stamp(struct ring_buffer
cpu_buffer->write_stamp =
cpu_buffer->commit_page->page->time_stamp;
else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
- delta = event->array[0];
- delta <<= TS_SHIFT;
- delta += event->time_delta;
+ delta = ring_buffer_event_time_stamp(event);
cpu_buffer->write_stamp += delta;
+ } else if (event->type_len == RINGBUF_TYPE_TIME_STAMP) {
+ delta = ring_buffer_event_time_stamp(event);
+ cpu_buffer->write_stamp = delta;
} else
cpu_buffer->write_stamp += event->time_delta;
}
@@ -2661,7 +2693,7 @@ static struct ring_buffer_event *
* If this is the first commit on the page, then it has the same
* timestamp as the page itself.
*/
- if (!tail)
+ if (!tail && !ring_buffer_time_stamp_abs(cpu_buffer->buffer))
info->delta = 0;
/* See if we shot pass the end of this buffer page */
@@ -2739,8 +2771,11 @@ rb_reserve_next_event(struct ring_buffer
/* make sure this diff is calculated here */
barrier();
- /* Did the write stamp get updated already? */
- if (likely(info.ts >= cpu_buffer->write_stamp)) {
+ if (ring_buffer_time_stamp_abs(buffer)) {
+ info.delta = info.ts;
+ rb_handle_timestamp(cpu_buffer, &info);
+ } else /* Did the write stamp get updated already? */
+ if (likely(info.ts >= cpu_buffer->write_stamp)) {
info.delta = diff;
if (unlikely(test_time_stamp(info.delta)))
rb_handle_timestamp(cpu_buffer, &info);
@@ -3422,14 +3457,13 @@ rb_update_read_stamp(struct ring_buffer_
return;
case RINGBUF_TYPE_TIME_EXTEND:
- delta = event->array[0];
- delta <<= TS_SHIFT;
- delta += event->time_delta;
+ delta = ring_buffer_event_time_stamp(event);
cpu_buffer->read_stamp += delta;
return;
case RINGBUF_TYPE_TIME_STAMP:
- /* FIXME: not implemented */
+ delta = ring_buffer_event_time_stamp(event);
+ cpu_buffer->read_stamp = delta;
return;
case RINGBUF_TYPE_DATA:
@@ -3453,14 +3487,13 @@ rb_update_iter_read_stamp(struct ring_bu
return;
case RINGBUF_TYPE_TIME_EXTEND:
- delta = event->array[0];
- delta <<= TS_SHIFT;
- delta += event->time_delta;
+ delta = ring_buffer_event_time_stamp(event);
iter->read_stamp += delta;
return;
case RINGBUF_TYPE_TIME_STAMP:
- /* FIXME: not implemented */
+ delta = ring_buffer_event_time_stamp(event);
+ iter->read_stamp = delta;
return;
case RINGBUF_TYPE_DATA:
@@ -3684,6 +3717,8 @@ rb_buffer_peek(struct ring_buffer_per_cp
struct buffer_page *reader;
int nr_loops = 0;
+ if (ts)
+ *ts = 0;
again:
/*
* We repeat when a time extend is encountered.
@@ -3720,12 +3755,17 @@ rb_buffer_peek(struct ring_buffer_per_cp
goto again;
case RINGBUF_TYPE_TIME_STAMP:
- /* FIXME: not implemented */
+ if (ts) {
+ *ts = ring_buffer_event_time_stamp(event);
+ ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
+ cpu_buffer->cpu, ts);
+ }
+ /* Internal data, OK to advance */
rb_advance_reader(cpu_buffer);
goto again;
case RINGBUF_TYPE_DATA:
- if (ts) {
+ if (ts && !(*ts)) {
*ts = cpu_buffer->read_stamp + event->time_delta;
ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
cpu_buffer->cpu, ts);
@@ -3750,6 +3790,9 @@ rb_iter_peek(struct ring_buffer_iter *it
struct ring_buffer_event *event;
int nr_loops = 0;
+ if (ts)
+ *ts = 0;
+
cpu_buffer = iter->cpu_buffer;
buffer = cpu_buffer->buffer;
@@ -3802,12 +3845,17 @@ rb_iter_peek(struct ring_buffer_iter *it
goto again;
case RINGBUF_TYPE_TIME_STAMP:
- /* FIXME: not implemented */
+ if (ts) {
+ *ts = ring_buffer_event_time_stamp(event);
+ ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
+ cpu_buffer->cpu, ts);
+ }
+ /* Internal data, OK to advance */
rb_advance_iter(iter);
goto again;
case RINGBUF_TYPE_DATA:
- if (ts) {
+ if (ts && !(*ts)) {
*ts = iter->read_stamp + event->time_delta;
ring_buffer_normalize_time_stamp(buffer,
cpu_buffer->cpu, ts);

View File

@ -1,318 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Fri, 22 Sep 2017 14:58:20 -0500
Subject: [PATCH 06/42] tracing: Make traceprobe parsing code reusable
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
traceprobe_probes_write() and traceprobe_command() actually contain
nothing that ties them to kprobes - the code is generically useful for
similar types of parsing elsewhere, so separate it out and move it to
trace.c/trace.h.
Other than moving it, the only change is in naming:
traceprobe_probes_write() becomes trace_parse_run_command() and
traceprobe_command() becomes trace_run_command().
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace.c | 86 ++++++++++++++++++++++++++++++++++++++++++++
kernel/trace/trace.h | 7 +++
kernel/trace/trace_kprobe.c | 18 ++++-----
kernel/trace/trace_probe.c | 86 --------------------------------------------
kernel/trace/trace_probe.h | 7 ---
kernel/trace/trace_uprobe.c | 2 -
6 files changed, 103 insertions(+), 103 deletions(-)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8265,6 +8265,92 @@ void ftrace_dump(enum ftrace_dump_mode o
}
EXPORT_SYMBOL_GPL(ftrace_dump);
+int trace_run_command(const char *buf, int (*createfn)(int, char **))
+{
+ char **argv;
+ int argc, ret;
+
+ argc = 0;
+ ret = 0;
+ argv = argv_split(GFP_KERNEL, buf, &argc);
+ if (!argv)
+ return -ENOMEM;
+
+ if (argc)
+ ret = createfn(argc, argv);
+
+ argv_free(argv);
+
+ return ret;
+}
+
+#define WRITE_BUFSIZE 4096
+
+ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos,
+ int (*createfn)(int, char **))
+{
+ char *kbuf, *buf, *tmp;
+ int ret = 0;
+ size_t done = 0;
+ size_t size;
+
+ kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
+
+ while (done < count) {
+ size = count - done;
+
+ if (size >= WRITE_BUFSIZE)
+ size = WRITE_BUFSIZE - 1;
+
+ if (copy_from_user(kbuf, buffer + done, size)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ kbuf[size] = '\0';
+ buf = kbuf;
+ do {
+ tmp = strchr(buf, '\n');
+ if (tmp) {
+ *tmp = '\0';
+ size = tmp - buf + 1;
+ } else {
+ size = strlen(buf);
+ if (done + size < count) {
+ if (buf != kbuf)
+ break;
+ /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
+ pr_warn("Line length is too long: Should be less than %d\n",
+ WRITE_BUFSIZE - 2);
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+ done += size;
+
+ /* Remove comments */
+ tmp = strchr(buf, '#');
+
+ if (tmp)
+ *tmp = '\0';
+
+ ret = trace_run_command(buf, createfn);
+ if (ret)
+ goto out;
+ buf += size;
+
+ } while (done < count);
+ }
+ ret = done;
+
+out:
+ kfree(kbuf);
+
+ return ret;
+}
+
__init static int tracer_alloc_buffers(void)
{
int ring_buf_size;
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1755,6 +1755,13 @@ void trace_printk_start_comm(void);
int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set);
int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled);
+#define MAX_EVENT_NAME_LEN 64
+
+extern int trace_run_command(const char *buf, int (*createfn)(int, char**));
+extern ssize_t trace_parse_run_command(struct file *file,
+ const char __user *buffer, size_t count, loff_t *ppos,
+ int (*createfn)(int, char**));
+
/*
* Normal trace_printk() and friends allocates special buffers
* to do the manipulation, as well as saves the print formats
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -907,8 +907,8 @@ static int probes_open(struct inode *ino
static ssize_t probes_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
- return traceprobe_probes_write(file, buffer, count, ppos,
- create_trace_kprobe);
+ return trace_parse_run_command(file, buffer, count, ppos,
+ create_trace_kprobe);
}
static const struct file_operations kprobe_events_ops = {
@@ -1433,9 +1433,9 @@ static __init int kprobe_trace_self_test
pr_info("Testing kprobe tracing: ");
- ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target "
- "$stack $stack0 +0($stack)",
- create_trace_kprobe);
+ ret = trace_run_command("p:testprobe kprobe_trace_selftest_target "
+ "$stack $stack0 +0($stack)",
+ create_trace_kprobe);
if (WARN_ON_ONCE(ret)) {
pr_warn("error on probing function entry.\n");
warn++;
@@ -1455,8 +1455,8 @@ static __init int kprobe_trace_self_test
}
}
- ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target "
- "$retval", create_trace_kprobe);
+ ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target "
+ "$retval", create_trace_kprobe);
if (WARN_ON_ONCE(ret)) {
pr_warn("error on probing function return.\n");
warn++;
@@ -1526,13 +1526,13 @@ static __init int kprobe_trace_self_test
disable_trace_kprobe(tk, file);
}
- ret = traceprobe_command("-:testprobe", create_trace_kprobe);
+ ret = trace_run_command("-:testprobe", create_trace_kprobe);
if (WARN_ON_ONCE(ret)) {
pr_warn("error on deleting a probe.\n");
warn++;
}
- ret = traceprobe_command("-:testprobe2", create_trace_kprobe);
+ ret = trace_run_command("-:testprobe2", create_trace_kprobe);
if (WARN_ON_ONCE(ret)) {
pr_warn("error on deleting a probe.\n");
warn++;
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -623,92 +623,6 @@ void traceprobe_free_probe_arg(struct pr
kfree(arg->comm);
}
-int traceprobe_command(const char *buf, int (*createfn)(int, char **))
-{
- char **argv;
- int argc, ret;
-
- argc = 0;
- ret = 0;
- argv = argv_split(GFP_KERNEL, buf, &argc);
- if (!argv)
- return -ENOMEM;
-
- if (argc)
- ret = createfn(argc, argv);
-
- argv_free(argv);
-
- return ret;
-}
-
-#define WRITE_BUFSIZE 4096
-
-ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *ppos,
- int (*createfn)(int, char **))
-{
- char *kbuf, *buf, *tmp;
- int ret = 0;
- size_t done = 0;
- size_t size;
-
- kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
- if (!kbuf)
- return -ENOMEM;
-
- while (done < count) {
- size = count - done;
-
- if (size >= WRITE_BUFSIZE)
- size = WRITE_BUFSIZE - 1;
-
- if (copy_from_user(kbuf, buffer + done, size)) {
- ret = -EFAULT;
- goto out;
- }
- kbuf[size] = '\0';
- buf = kbuf;
- do {
- tmp = strchr(buf, '\n');
- if (tmp) {
- *tmp = '\0';
- size = tmp - buf + 1;
- } else {
- size = strlen(buf);
- if (done + size < count) {
- if (buf != kbuf)
- break;
- /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
- pr_warn("Line length is too long: Should be less than %d\n",
- WRITE_BUFSIZE - 2);
- ret = -EINVAL;
- goto out;
- }
- }
- done += size;
-
- /* Remove comments */
- tmp = strchr(buf, '#');
-
- if (tmp)
- *tmp = '\0';
-
- ret = traceprobe_command(buf, createfn);
- if (ret)
- goto out;
- buf += size;
-
- } while (done < count);
- }
- ret = done;
-
-out:
- kfree(kbuf);
-
- return ret;
-}
-
static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
bool is_return)
{
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -42,7 +42,6 @@
#define MAX_TRACE_ARGS 128
#define MAX_ARGSTR_LEN 63
-#define MAX_EVENT_NAME_LEN 64
#define MAX_STRING_SIZE PATH_MAX
/* Reserved field names */
@@ -356,12 +355,6 @@ extern void traceprobe_free_probe_arg(st
extern int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset);
-extern ssize_t traceprobe_probes_write(struct file *file,
- const char __user *buffer, size_t count, loff_t *ppos,
- int (*createfn)(int, char**));
-
-extern int traceprobe_command(const char *buf, int (*createfn)(int, char**));
-
/* Sum up total data length for dynamic arraies (strings) */
static nokprobe_inline int
__get_data_size(struct trace_probe *tp, struct pt_regs *regs)
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -651,7 +651,7 @@ static int probes_open(struct inode *ino
static ssize_t probes_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
- return traceprobe_probes_write(file, buffer, count, ppos, create_trace_uprobe);
+ return trace_parse_run_command(file, buffer, count, ppos, create_trace_uprobe);
}
static const struct file_operations uprobe_events_ops = {

View File

@ -1,31 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 4 Apr 2018 11:43:57 +0200
Subject: [PATCH] userns: Use irqsave variant of refcount_dec_and_lock()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
The irqsave variant of refcount_dec_and_lock handles irqsave/restore when
taking/releasing the spin lock. With this variant the call of
local_irq_save/restore is no longer required.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
[bigeasy: s@atomic_dec_and_lock@refcount_dec_and_lock@g ]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/user.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -169,11 +169,8 @@ void free_uid(struct user_struct *up)
if (!up)
return;
- local_irq_save(flags);
- if (refcount_dec_and_lock(&up->__count, &uidhash_lock))
+ if (refcount_dec_and_lock_irqsave(&up->__count, &uidhash_lock, &flags))
free_user(up, flags);
- else
- local_irq_restore(flags);
}
struct user_struct *alloc_uid(kuid_t uid)

View File

@ -1,150 +0,0 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Fri, 23 Feb 2018 00:50:24 +0100
Subject: [PATCH 07/17] fs/dcache: Avoid a try_lock loop in
shrink_dentry_list()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 8f04da2adbdffed8dc4b2feb00ec3b3d84683885
shrink_dentry_list() holds dentry->d_lock and needs to acquire
dentry->d_inode->i_lock. This cannot be done with a spin_lock()
operation because it's the reverse of the regular lock order.
To avoid ABBA deadlocks it is done with a trylock loop.
Trylock loops are problematic in two scenarios:
1) PREEMPT_RT converts spinlocks to 'sleeping' spinlocks, which are
preemptible. As a consequence the i_lock holder can be preempted
by a higher priority task. If that task executes the trylock loop
it will do so forever and live lock.
2) In virtual machines trylock loops are problematic as well. The
VCPU on which the i_lock holder runs can be scheduled out and a
task on a different VCPU can loop for a whole time slice. In the
worst case this can lead to starvation. Commits 47be61845c77
("fs/dcache.c: avoid soft-lockup in dput()") and 046b961b45f9
("shrink_dentry_list(): take parent's d_lock earlier") are
addressing exactly those symptoms.
Avoid the trylock loop by using dentry_kill(). When pruning ancestors,
the same code applies that is used to kill a dentry in dput(). This
also has the benefit that the locking order is now the same. First
the inode is locked, then the parent.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 41 ++++++++++-------------------------------
1 file changed, 10 insertions(+), 31 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -992,9 +992,11 @@ EXPORT_SYMBOL(d_prune_aliases);
/*
* Lock a dentry from shrink list.
+ * Called under rcu_read_lock() and dentry->d_lock; the former
+ * guarantees that nothing we access will be freed under us.
* Note that dentry is *not* protected from concurrent dentry_kill(),
- * d_delete(), etc. It is protected from freeing (by the fact of
- * being on a shrink list), but everything else is fair game.
+ * d_delete(), etc.
+ *
* Return false if dentry has been disrupted or grabbed, leaving
* the caller to kick it off-list. Otherwise, return true and have
* that dentry's inode and parent both locked.
@@ -1009,7 +1011,6 @@ static bool shrink_lock_dentry(struct de
inode = dentry->d_inode;
if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
- rcu_read_lock(); /* to protect inode */
spin_unlock(&dentry->d_lock);
spin_lock(&inode->i_lock);
spin_lock(&dentry->d_lock);
@@ -1018,16 +1019,13 @@ static bool shrink_lock_dentry(struct de
/* changed inode means that somebody had grabbed it */
if (unlikely(inode != dentry->d_inode))
goto out;
- rcu_read_unlock();
}
parent = dentry->d_parent;
if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock)))
return true;
- rcu_read_lock(); /* to protect parent */
spin_unlock(&dentry->d_lock);
- parent = READ_ONCE(dentry->d_parent);
spin_lock(&parent->d_lock);
if (unlikely(parent != dentry->d_parent)) {
spin_unlock(&parent->d_lock);
@@ -1035,15 +1033,12 @@ static bool shrink_lock_dentry(struct de
goto out;
}
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
- if (likely(!dentry->d_lockref.count)) {
- rcu_read_unlock();
+ if (likely(!dentry->d_lockref.count))
return true;
- }
spin_unlock(&parent->d_lock);
out:
if (inode)
spin_unlock(&inode->i_lock);
- rcu_read_unlock();
return false;
}
@@ -1051,12 +1046,13 @@ static void shrink_dentry_list(struct li
{
while (!list_empty(list)) {
struct dentry *dentry, *parent;
- struct inode *inode;
dentry = list_entry(list->prev, struct dentry, d_lru);
spin_lock(&dentry->d_lock);
+ rcu_read_lock();
if (!shrink_lock_dentry(dentry)) {
bool can_free = false;
+ rcu_read_unlock();
d_shrink_del(dentry);
if (dentry->d_lockref.count < 0)
can_free = dentry->d_flags & DCACHE_MAY_FREE;
@@ -1065,6 +1061,7 @@ static void shrink_dentry_list(struct li
dentry_free(dentry);
continue;
}
+ rcu_read_unlock();
d_shrink_del(dentry);
parent = dentry->d_parent;
__dentry_kill(dentry);
@@ -1077,26 +1074,8 @@ static void shrink_dentry_list(struct li
* fragmentation.
*/
dentry = parent;
- while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) {
- parent = lock_parent(dentry);
- if (dentry->d_lockref.count != 1) {
- dentry->d_lockref.count--;
- spin_unlock(&dentry->d_lock);
- if (parent)
- spin_unlock(&parent->d_lock);
- break;
- }
- inode = dentry->d_inode; /* can't be NULL */
- if (unlikely(!spin_trylock(&inode->i_lock))) {
- spin_unlock(&dentry->d_lock);
- if (parent)
- spin_unlock(&parent->d_lock);
- cpu_relax();
- continue;
- }
- __dentry_kill(dentry);
- dentry = parent;
- }
+ while (dentry && !lockref_put_or_lock(&dentry->d_lockref))
+ dentry = dentry_kill(dentry);
}
}

View File

@ -1,46 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 20 Dec 2017 17:12:56 +0100
Subject: [PATCH 07/29] hrtimer: Cleanup hrtimer_mode enum
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
It's not obvious that the HRTIMER_MODE variants are bit combinations
because all modes are hard coded constants.
Change it so the bit meanings are clear and use the symbols for creating
modes which combine bits.
While at it get rid of the ugly tail comments.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/hrtimer.h | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -28,13 +28,19 @@ struct hrtimer_cpu_base;
/*
* Mode arguments of xxx_hrtimer functions:
+ *
+ * HRTIMER_MODE_ABS - Time value is absolute
+ * HRTIMER_MODE_REL - Time value is relative to now
+ * HRTIMER_MODE_PINNED - Timer is bound to CPU (is only considered
+ * when starting the timer)
*/
enum hrtimer_mode {
- HRTIMER_MODE_ABS = 0x0, /* Time value is absolute */
- HRTIMER_MODE_REL = 0x1, /* Time value is relative to now */
- HRTIMER_MODE_PINNED = 0x02, /* Timer is bound to CPU */
- HRTIMER_MODE_ABS_PINNED = 0x02,
- HRTIMER_MODE_REL_PINNED = 0x03,
+ HRTIMER_MODE_ABS = 0x00,
+ HRTIMER_MODE_REL = 0x01,
+ HRTIMER_MODE_PINNED = 0x02,
+
+ HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED,
+ HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED,
};
/*

View File

@ -1,67 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:39 +0100
Subject: [PATCH 07/10] iommu/amd: Factor out setting the remap table for a
devid
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 2fcc1e8ac4a8514c64f946178fc36c2e30e56a41
Setting the IRQ remap table for a specific devid (or its alias devid)
includes three steps. Those three steps are always repeated each time
this is done.
Introduce a new helper function, move those steps there and use that
function instead. The compiler can still decide if it is worth to
inline.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3603,6 +3603,14 @@ static struct irq_remap_table *get_irq_t
return table;
}
+static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid,
+ struct irq_remap_table *table)
+{
+ irq_lookup_table[devid] = table;
+ set_dte_irq_entry(devid, table);
+ iommu_flush_dte(iommu, devid);
+}
+
static struct irq_remap_table *alloc_irq_table(u16 devid)
{
struct irq_remap_table *table = NULL;
@@ -3623,9 +3631,7 @@ static struct irq_remap_table *alloc_irq
alias = amd_iommu_alias_table[devid];
table = irq_lookup_table[alias];
if (table) {
- irq_lookup_table[devid] = table;
- set_dte_irq_entry(devid, table);
- iommu_flush_dte(iommu, devid);
+ set_remap_table_entry(iommu, devid, table);
goto out;
}
@@ -3652,14 +3658,9 @@ static struct irq_remap_table *alloc_irq
(MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
- irq_lookup_table[devid] = table;
- set_dte_irq_entry(devid, table);
- iommu_flush_dte(iommu, devid);
- if (devid != alias) {
- irq_lookup_table[alias] = table;
- set_dte_irq_entry(alias, table);
- iommu_flush_dte(iommu, alias);
- }
+ set_remap_table_entry(iommu, devid, table);
+ if (devid != alias)
+ set_remap_table_entry(iommu, alias, table);
out:
iommu_completion_wait(iommu);

View File

@ -1,37 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 4 Apr 2018 11:43:58 +0200
Subject: [PATCH] md: raid5: Use irqsave variant of refcount_dec_and_lock()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
The irqsave variant of refcount_dec_and_lock handles irqsave/restore when
taking/releasing the spin lock. With this variant the call of
local_irq_save is no longer required.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
[bigeasy: s@atomic_dec_and_lock@refcount_dec_and_lock@g ]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/md/raid5.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -409,16 +409,15 @@ void raid5_release_stripe(struct stripe_
md_wakeup_thread(conf->mddev->thread);
return;
slow_path:
- local_irq_save(flags);
/* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */
- if (refcount_dec_and_lock(&sh->count, &conf->device_lock)) {
+ if (refcount_dec_and_lock_irqsave(&sh->count, &conf->device_lock, &flags)) {
INIT_LIST_HEAD(&list);
hash = sh->hash_lock_index;
do_release_stripe(conf, sh, &list);
spin_unlock(&conf->device_lock);
release_inactive_stripe_list(conf, &list, hash);
+ local_irq_restore(flags);
}
- local_irq_restore(flags);
}
static inline void remove_hash(struct stripe_head *sh)

View File

@ -1,134 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:41 -0600
Subject: [PATCH 07/37] tracing: Add timestamp_mode trace file
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Add a new option flag indicating whether or not the ring buffer is in
'absolute timestamp' mode.
Currently this is only set/unset by hist triggers that make use of a
common_timestamp. As such, there's no reason to make this writeable
for users - its purpose is only to allow users to determine
unequivocally whether or not the ring buffer is in that mode (although
absolute timestamps can coexist with the normal delta timestamps, when
the ring buffer is in absolute mode, timestamps written while absolute
mode is in effect take up more space in the buffer, and are not as
efficient).
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
Documentation/trace/ftrace.txt | 24 ++++++++++++++++++++
kernel/trace/trace.c | 47 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 71 insertions(+)
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -539,6 +539,30 @@ After mounting tracefs you will have acc
See events.txt for more information.
+ timestamp_mode:
+
+ Certain tracers may change the timestamp mode used when
+ logging trace events into the event buffer. Events with
+ different modes can coexist within a buffer but the mode in
+ effect when an event is logged determines which timestamp mode
+ is used for that event. The default timestamp mode is
+ 'delta'.
+
+ Usual timestamp modes for tracing:
+
+ # cat timestamp_mode
+ [delta] absolute
+
+ The timestamp mode with the square brackets around it is the
+ one in effect.
+
+ delta: Default timestamp mode - timestamp is a delta against
+ a per-buffer timestamp.
+
+ absolute: The timestamp is a full timestamp, not a delta
+ against some other value. As such it takes up more
+ space and is less efficient.
+
hwlat_detector:
Directory for the Hardware Latency Detector.
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4520,6 +4520,9 @@ static const char readme_msg[] =
#ifdef CONFIG_X86_64
" x86-tsc: TSC cycle counter\n"
#endif
+ "\n timestamp_mode\t-view the mode used to timestamp events\n"
+ " delta: Delta difference against a buffer-wide timestamp\n"
+ " absolute: Absolute (standalone) timestamp\n"
"\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
"\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
" tracing_cpumask\t- Limit which CPUs to trace\n"
@@ -6287,6 +6290,40 @@ static int tracing_clock_open(struct ino
return ret;
}
+static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
+{
+ struct trace_array *tr = m->private;
+
+ mutex_lock(&trace_types_lock);
+
+ if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
+ seq_puts(m, "delta [absolute]\n");
+ else
+ seq_puts(m, "[delta] absolute\n");
+
+ mutex_unlock(&trace_types_lock);
+
+ return 0;
+}
+
+static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+ int ret;
+
+ if (tracing_disabled)
+ return -ENODEV;
+
+ if (trace_array_get(tr))
+ return -ENODEV;
+
+ ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
+ if (ret < 0)
+ trace_array_put(tr);
+
+ return ret;
+}
+
int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
{
int ret = 0;
@@ -6565,6 +6602,13 @@ static const struct file_operations trac
.write = tracing_clock_write,
};
+static const struct file_operations trace_time_stamp_mode_fops = {
+ .open = tracing_time_stamp_mode_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = tracing_single_release_tr,
+};
+
#ifdef CONFIG_TRACER_SNAPSHOT
static const struct file_operations snapshot_fops = {
.open = tracing_snapshot_open,
@@ -7887,6 +7931,9 @@ init_tracer_tracefs(struct trace_array *
trace_create_file("tracing_on", 0644, d_tracer,
tr, &rb_simple_fops);
+ trace_create_file("timestamp_mode", 0444, d_tracer, tr,
+ &trace_time_stamp_mode_fops);
+
create_trace_options_dir(tr);
#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)

View File

@ -1,44 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Fri, 22 Sep 2017 14:58:21 -0500
Subject: [PATCH 07/42] tracing: Clean up hist_field_flags enum
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
As we add more flags, specifying explicit integers for the flag values
becomes more unwieldy and error-prone - switch them over to left-shift
values.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -110,16 +110,16 @@ DEFINE_HIST_FIELD_FN(u8);
#define HIST_KEY_SIZE_MAX (MAX_FILTER_STR_VAL + HIST_STACKTRACE_SIZE)
enum hist_field_flags {
- HIST_FIELD_FL_HITCOUNT = 1,
- HIST_FIELD_FL_KEY = 2,
- HIST_FIELD_FL_STRING = 4,
- HIST_FIELD_FL_HEX = 8,
- HIST_FIELD_FL_SYM = 16,
- HIST_FIELD_FL_SYM_OFFSET = 32,
- HIST_FIELD_FL_EXECNAME = 64,
- HIST_FIELD_FL_SYSCALL = 128,
- HIST_FIELD_FL_STACKTRACE = 256,
- HIST_FIELD_FL_LOG2 = 512,
+ HIST_FIELD_FL_HITCOUNT = 1 << 0,
+ HIST_FIELD_FL_KEY = 1 << 1,
+ HIST_FIELD_FL_STRING = 1 << 2,
+ HIST_FIELD_FL_HEX = 1 << 3,
+ HIST_FIELD_FL_SYM = 1 << 4,
+ HIST_FIELD_FL_SYM_OFFSET = 1 << 5,
+ HIST_FIELD_FL_EXECNAME = 1 << 6,
+ HIST_FIELD_FL_SYSCALL = 1 << 7,
+ HIST_FIELD_FL_STACKTRACE = 1 << 8,
+ HIST_FIELD_FL_LOG2 = 1 << 9,
};
struct hist_trigger_attrs {

View File

@ -1,33 +0,0 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 25 Feb 2018 02:47:29 -0500
Subject: [PATCH 08/17] dcache.c: trim includes
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 43986d63b60fd0152d9038ee3f0f9294efa8c983
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 3 ---
1 file changed, 3 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -25,17 +25,14 @@
#include <linux/cache.h>
#include <linux/export.h>
#include <linux/mount.h>
-#include <linux/file.h>
#include <linux/uaccess.h>
#include <linux/security.h>
#include <linux/seqlock.h>
-#include <linux/swap.h>
#include <linux/bootmem.h>
#include <linux/fs_struct.h>
#include <linux/bit_spinlock.h>
#include <linux/rculist_bl.h>
#include <linux/prefetch.h>
-#include <linux/ratelimit.h>
#include <linux/list_lru.h>
#include "internal.h"
#include "mount.h"

View File

@ -1,132 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:40 +0100
Subject: [PATCH 08/10] iommu/amd: Drop the lock while allocating new irq remap
table
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 993ca6e063a69a0c65ca42ed449b6bc1b3844151
The irq_remap_table is allocated while the iommu_table_lock is held with
interrupts disabled.
>From looking at the call sites, all callers are in the early device
initialisation (apic_bsp_setup(), pci_enable_device(),
pci_enable_msi()) so make sense to drop the lock which also enables
interrupts and try to allocate that memory with GFP_KERNEL instead
GFP_ATOMIC.
Since during the allocation the iommu_table_lock is dropped, we need to
recheck if table exists after the lock has been reacquired. I *think*
that it is impossible that the "devid" entry appears in irq_lookup_table
while the lock is dropped since the same device can only be probed once.
However I check for both cases, just to be sure.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 63 ++++++++++++++++++++++++++++++++--------------
1 file changed, 45 insertions(+), 18 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3603,6 +3603,30 @@ static struct irq_remap_table *get_irq_t
return table;
}
+static struct irq_remap_table *__alloc_irq_table(void)
+{
+ struct irq_remap_table *table;
+
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return NULL;
+
+ table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_KERNEL);
+ if (!table->table) {
+ kfree(table);
+ return NULL;
+ }
+ raw_spin_lock_init(&table->lock);
+
+ if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
+ memset(table->table, 0,
+ MAX_IRQS_PER_TABLE * sizeof(u32));
+ else
+ memset(table->table, 0,
+ (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
+ return table;
+}
+
static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid,
struct irq_remap_table *table)
{
@@ -3614,6 +3638,7 @@ static void set_remap_table_entry(struct
static struct irq_remap_table *alloc_irq_table(u16 devid)
{
struct irq_remap_table *table = NULL;
+ struct irq_remap_table *new_table = NULL;
struct amd_iommu *iommu;
unsigned long flags;
u16 alias;
@@ -3632,42 +3657,44 @@ static struct irq_remap_table *alloc_irq
table = irq_lookup_table[alias];
if (table) {
set_remap_table_entry(iommu, devid, table);
- goto out;
+ goto out_wait;
}
+ spin_unlock_irqrestore(&iommu_table_lock, flags);
/* Nothing there yet, allocate new irq remapping table */
- table = kzalloc(sizeof(*table), GFP_ATOMIC);
- if (!table)
- goto out_unlock;
+ new_table = __alloc_irq_table();
+ if (!new_table)
+ return NULL;
- /* Initialize table spin-lock */
- raw_spin_lock_init(&table->lock);
+ spin_lock_irqsave(&iommu_table_lock, flags);
- table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_ATOMIC);
- if (!table->table) {
- kfree(table);
- table = NULL;
+ table = irq_lookup_table[devid];
+ if (table)
goto out_unlock;
- }
- if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
- memset(table->table, 0,
- MAX_IRQS_PER_TABLE * sizeof(u32));
- else
- memset(table->table, 0,
- (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
+ table = irq_lookup_table[alias];
+ if (table) {
+ set_remap_table_entry(iommu, devid, table);
+ goto out_wait;
+ }
+ table = new_table;
+ new_table = NULL;
set_remap_table_entry(iommu, devid, table);
if (devid != alias)
set_remap_table_entry(iommu, alias, table);
-out:
+out_wait:
iommu_completion_wait(iommu);
out_unlock:
spin_unlock_irqrestore(&iommu_table_lock, flags);
+ if (new_table) {
+ kmem_cache_free(amd_iommu_irq_cache, new_table->table);
+ kfree(new_table);
+ }
return table;
}

View File

@ -1,32 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 4 Apr 2018 11:43:59 +0200
Subject: [PATCH] md: raid5: Do not disable irq on
release_inactive_stripe_list() call
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
There is no need to invoke release_inactive_stripe_list() with interrupts
disabled. All call sites, except raid5_release_stripe(), unlock
->device_lock and enable interrupts before invoking the function.
Make it consistent.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
[bigeasy: s@atomic_dec_and_lock@refcount_dec_and_lock@g ]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/md/raid5.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -414,9 +414,8 @@ void raid5_release_stripe(struct stripe_
INIT_LIST_HEAD(&list);
hash = sh->hash_lock_index;
do_release_stripe(conf, sh, &list);
- spin_unlock(&conf->device_lock);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
release_inactive_stripe_list(conf, &list, hash);
- local_irq_restore(flags);
}
}

View File

@ -1,175 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Fri, 22 Sep 2017 14:58:22 -0500
Subject: [PATCH 08/42] tracing: Add hist_field_name() accessor
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
In preparation for hist_fields that won't be strictly based on
trace_event_fields, add a new hist_field_name() accessor to allow that
flexibility and update associated users.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 67 ++++++++++++++++++++++++++-------------
1 file changed, 45 insertions(+), 22 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -146,6 +146,23 @@ struct hist_trigger_data {
struct tracing_map *map;
};
+static const char *hist_field_name(struct hist_field *field,
+ unsigned int level)
+{
+ const char *field_name = "";
+
+ if (level > 1)
+ return field_name;
+
+ if (field->field)
+ field_name = field->field->name;
+
+ if (field_name == NULL)
+ field_name = "";
+
+ return field_name;
+}
+
static hist_field_fn_t select_value_fn(int field_size, int field_is_signed)
{
hist_field_fn_t fn = NULL;
@@ -653,7 +670,6 @@ static int is_descending(const char *str
static int create_sort_keys(struct hist_trigger_data *hist_data)
{
char *fields_str = hist_data->attrs->sort_key_str;
- struct ftrace_event_field *field = NULL;
struct tracing_map_sort_key *sort_key;
int descending, ret = 0;
unsigned int i, j;
@@ -670,7 +686,9 @@ static int create_sort_keys(struct hist_
}
for (i = 0; i < TRACING_MAP_SORT_KEYS_MAX; i++) {
+ struct hist_field *hist_field;
char *field_str, *field_name;
+ const char *test_name;
sort_key = &hist_data->sort_keys[i];
@@ -703,8 +721,10 @@ static int create_sort_keys(struct hist_
}
for (j = 1; j < hist_data->n_fields; j++) {
- field = hist_data->fields[j]->field;
- if (field && (strcmp(field_name, field->name) == 0)) {
+ hist_field = hist_data->fields[j];
+ test_name = hist_field_name(hist_field, 0);
+
+ if (strcmp(field_name, test_name) == 0) {
sort_key->field_idx = j;
descending = is_descending(field_str);
if (descending < 0) {
@@ -952,6 +972,7 @@ hist_trigger_entry_print(struct seq_file
struct hist_field *key_field;
char str[KSYM_SYMBOL_LEN];
bool multiline = false;
+ const char *field_name;
unsigned int i;
u64 uval;
@@ -963,26 +984,27 @@ hist_trigger_entry_print(struct seq_file
if (i > hist_data->n_vals)
seq_puts(m, ", ");
+ field_name = hist_field_name(key_field, 0);
+
if (key_field->flags & HIST_FIELD_FL_HEX) {
uval = *(u64 *)(key + key_field->offset);
- seq_printf(m, "%s: %llx",
- key_field->field->name, uval);
+ seq_printf(m, "%s: %llx", field_name, uval);
} else if (key_field->flags & HIST_FIELD_FL_SYM) {
uval = *(u64 *)(key + key_field->offset);
sprint_symbol_no_offset(str, uval);
- seq_printf(m, "%s: [%llx] %-45s",
- key_field->field->name, uval, str);
+ seq_printf(m, "%s: [%llx] %-45s", field_name,
+ uval, str);
} else if (key_field->flags & HIST_FIELD_FL_SYM_OFFSET) {
uval = *(u64 *)(key + key_field->offset);
sprint_symbol(str, uval);
- seq_printf(m, "%s: [%llx] %-55s",
- key_field->field->name, uval, str);
+ seq_printf(m, "%s: [%llx] %-55s", field_name,
+ uval, str);
} else if (key_field->flags & HIST_FIELD_FL_EXECNAME) {
char *comm = elt->private_data;
uval = *(u64 *)(key + key_field->offset);
- seq_printf(m, "%s: %-16s[%10llu]",
- key_field->field->name, comm, uval);
+ seq_printf(m, "%s: %-16s[%10llu]", field_name,
+ comm, uval);
} else if (key_field->flags & HIST_FIELD_FL_SYSCALL) {
const char *syscall_name;
@@ -991,8 +1013,8 @@ hist_trigger_entry_print(struct seq_file
if (!syscall_name)
syscall_name = "unknown_syscall";
- seq_printf(m, "%s: %-30s[%3llu]",
- key_field->field->name, syscall_name, uval);
+ seq_printf(m, "%s: %-30s[%3llu]", field_name,
+ syscall_name, uval);
} else if (key_field->flags & HIST_FIELD_FL_STACKTRACE) {
seq_puts(m, "stacktrace:\n");
hist_trigger_stacktrace_print(m,
@@ -1000,15 +1022,14 @@ hist_trigger_entry_print(struct seq_file
HIST_STACKTRACE_DEPTH);
multiline = true;
} else if (key_field->flags & HIST_FIELD_FL_LOG2) {
- seq_printf(m, "%s: ~ 2^%-2llu", key_field->field->name,
+ seq_printf(m, "%s: ~ 2^%-2llu", field_name,
*(u64 *)(key + key_field->offset));
} else if (key_field->flags & HIST_FIELD_FL_STRING) {
- seq_printf(m, "%s: %-50s", key_field->field->name,
+ seq_printf(m, "%s: %-50s", field_name,
(char *)(key + key_field->offset));
} else {
uval = *(u64 *)(key + key_field->offset);
- seq_printf(m, "%s: %10llu", key_field->field->name,
- uval);
+ seq_printf(m, "%s: %10llu", field_name, uval);
}
}
@@ -1021,13 +1042,13 @@ hist_trigger_entry_print(struct seq_file
tracing_map_read_sum(elt, HITCOUNT_IDX));
for (i = 1; i < hist_data->n_vals; i++) {
+ field_name = hist_field_name(hist_data->fields[i], 0);
+
if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) {
- seq_printf(m, " %s: %10llx",
- hist_data->fields[i]->field->name,
+ seq_printf(m, " %s: %10llx", field_name,
tracing_map_read_sum(elt, i));
} else {
- seq_printf(m, " %s: %10llu",
- hist_data->fields[i]->field->name,
+ seq_printf(m, " %s: %10llu", field_name,
tracing_map_read_sum(elt, i));
}
}
@@ -1142,7 +1163,9 @@ static const char *get_hist_field_flags(
static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)
{
- seq_printf(m, "%s", hist_field->field->name);
+ const char *field_name = hist_field_name(hist_field, 0);
+
+ seq_printf(m, "%s", field_name);
if (hist_field->flags) {
const char *flags_str = get_hist_field_flags(hist_field);

View File

@ -1,299 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:42 -0600
Subject: [PATCH 08/37] tracing: Give event triggers access to
ring_buffer_event
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
The ring_buffer event can provide a timestamp that may be useful to
various triggers - pass it into the handlers for that purpose.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/trace_events.h | 14 ++++++----
kernel/trace/trace.h | 9 +++---
kernel/trace/trace_events_hist.c | 11 +++++---
kernel/trace/trace_events_trigger.c | 47 ++++++++++++++++++++++--------------
4 files changed, 49 insertions(+), 32 deletions(-)
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -402,11 +402,13 @@ enum event_trigger_type {
extern int filter_match_preds(struct event_filter *filter, void *rec);
-extern enum event_trigger_type event_triggers_call(struct trace_event_file *file,
- void *rec);
-extern void event_triggers_post_call(struct trace_event_file *file,
- enum event_trigger_type tt,
- void *rec);
+extern enum event_trigger_type
+event_triggers_call(struct trace_event_file *file, void *rec,
+ struct ring_buffer_event *event);
+extern void
+event_triggers_post_call(struct trace_event_file *file,
+ enum event_trigger_type tt,
+ void *rec, struct ring_buffer_event *event);
bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
@@ -426,7 +428,7 @@ trace_trigger_soft_disabled(struct trace
if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) {
if (eflags & EVENT_FILE_FL_TRIGGER_MODE)
- event_triggers_call(file, NULL);
+ event_triggers_call(file, NULL, NULL);
if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
return true;
if (eflags & EVENT_FILE_FL_PID_FILTER)
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1296,7 +1296,7 @@ static inline bool
unsigned long eflags = file->flags;
if (eflags & EVENT_FILE_FL_TRIGGER_COND)
- *tt = event_triggers_call(file, entry);
+ *tt = event_triggers_call(file, entry, event);
if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
(unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
@@ -1333,7 +1333,7 @@ event_trigger_unlock_commit(struct trace
trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc);
if (tt)
- event_triggers_post_call(file, tt, entry);
+ event_triggers_post_call(file, tt, entry, event);
}
/**
@@ -1366,7 +1366,7 @@ event_trigger_unlock_commit_regs(struct
irq_flags, pc, regs);
if (tt)
- event_triggers_post_call(file, tt, entry);
+ event_triggers_post_call(file, tt, entry, event);
}
#define FILTER_PRED_INVALID ((unsigned short)-1)
@@ -1591,7 +1591,8 @@ extern int register_trigger_hist_enable_
*/
struct event_trigger_ops {
void (*func)(struct event_trigger_data *data,
- void *rec);
+ void *rec,
+ struct ring_buffer_event *rbe);
int (*init)(struct event_trigger_ops *ops,
struct event_trigger_data *data);
void (*free)(struct event_trigger_ops *ops,
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -909,7 +909,8 @@ static inline void add_to_key(char *comp
memcpy(compound_key + key_field->offset, key, size);
}
-static void event_hist_trigger(struct event_trigger_data *data, void *rec)
+static void event_hist_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
struct hist_trigger_data *hist_data = data->private_data;
bool use_compound_key = (hist_data->n_keys > 1);
@@ -1660,7 +1661,8 @@ static struct event_command trigger_hist
}
static void
-hist_enable_trigger(struct event_trigger_data *data, void *rec)
+hist_enable_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
struct enable_trigger_data *enable_data = data->private_data;
struct event_trigger_data *test;
@@ -1676,7 +1678,8 @@ hist_enable_trigger(struct event_trigger
}
static void
-hist_enable_count_trigger(struct event_trigger_data *data, void *rec)
+hist_enable_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (!data->count)
return;
@@ -1684,7 +1687,7 @@ hist_enable_count_trigger(struct event_t
if (data->count != -1)
(data->count)--;
- hist_enable_trigger(data, rec);
+ hist_enable_trigger(data, rec, event);
}
static struct event_trigger_ops hist_enable_trigger_ops = {
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -63,7 +63,8 @@ void trigger_data_free(struct event_trig
* any trigger that should be deferred, ETT_NONE if nothing to defer.
*/
enum event_trigger_type
-event_triggers_call(struct trace_event_file *file, void *rec)
+event_triggers_call(struct trace_event_file *file, void *rec,
+ struct ring_buffer_event *event)
{
struct event_trigger_data *data;
enum event_trigger_type tt = ETT_NONE;
@@ -76,7 +77,7 @@ event_triggers_call(struct trace_event_f
if (data->paused)
continue;
if (!rec) {
- data->ops->func(data, rec);
+ data->ops->func(data, rec, event);
continue;
}
filter = rcu_dereference_sched(data->filter);
@@ -86,7 +87,7 @@ event_triggers_call(struct trace_event_f
tt |= data->cmd_ops->trigger_type;
continue;
}
- data->ops->func(data, rec);
+ data->ops->func(data, rec, event);
}
return tt;
}
@@ -108,7 +109,7 @@ EXPORT_SYMBOL_GPL(event_triggers_call);
void
event_triggers_post_call(struct trace_event_file *file,
enum event_trigger_type tt,
- void *rec)
+ void *rec, struct ring_buffer_event *event)
{
struct event_trigger_data *data;
@@ -116,7 +117,7 @@ event_triggers_post_call(struct trace_ev
if (data->paused)
continue;
if (data->cmd_ops->trigger_type & tt)
- data->ops->func(data, rec);
+ data->ops->func(data, rec, event);
}
}
EXPORT_SYMBOL_GPL(event_triggers_post_call);
@@ -909,7 +910,8 @@ void set_named_trigger_data(struct event
}
static void
-traceon_trigger(struct event_trigger_data *data, void *rec)
+traceon_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (tracing_is_on())
return;
@@ -918,7 +920,8 @@ traceon_trigger(struct event_trigger_dat
}
static void
-traceon_count_trigger(struct event_trigger_data *data, void *rec)
+traceon_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (tracing_is_on())
return;
@@ -933,7 +936,8 @@ traceon_count_trigger(struct event_trigg
}
static void
-traceoff_trigger(struct event_trigger_data *data, void *rec)
+traceoff_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (!tracing_is_on())
return;
@@ -942,7 +946,8 @@ traceoff_trigger(struct event_trigger_da
}
static void
-traceoff_count_trigger(struct event_trigger_data *data, void *rec)
+traceoff_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (!tracing_is_on())
return;
@@ -1039,13 +1044,15 @@ static struct event_command trigger_trac
#ifdef CONFIG_TRACER_SNAPSHOT
static void
-snapshot_trigger(struct event_trigger_data *data, void *rec)
+snapshot_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
tracing_snapshot();
}
static void
-snapshot_count_trigger(struct event_trigger_data *data, void *rec)
+snapshot_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (!data->count)
return;
@@ -1053,7 +1060,7 @@ snapshot_count_trigger(struct event_trig
if (data->count != -1)
(data->count)--;
- snapshot_trigger(data, rec);
+ snapshot_trigger(data, rec, event);
}
static int
@@ -1132,13 +1139,15 @@ static __init int register_trigger_snaps
#define STACK_SKIP 3
static void
-stacktrace_trigger(struct event_trigger_data *data, void *rec)
+stacktrace_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
trace_dump_stack(STACK_SKIP);
}
static void
-stacktrace_count_trigger(struct event_trigger_data *data, void *rec)
+stacktrace_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (!data->count)
return;
@@ -1146,7 +1155,7 @@ stacktrace_count_trigger(struct event_tr
if (data->count != -1)
(data->count)--;
- stacktrace_trigger(data, rec);
+ stacktrace_trigger(data, rec, event);
}
static int
@@ -1208,7 +1217,8 @@ static __init void unregister_trigger_tr
}
static void
-event_enable_trigger(struct event_trigger_data *data, void *rec)
+event_enable_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
struct enable_trigger_data *enable_data = data->private_data;
@@ -1219,7 +1229,8 @@ event_enable_trigger(struct event_trigge
}
static void
-event_enable_count_trigger(struct event_trigger_data *data, void *rec)
+event_enable_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
struct enable_trigger_data *enable_data = data->private_data;
@@ -1233,7 +1244,7 @@ event_enable_count_trigger(struct event_
if (data->count != -1)
(data->count)--;
- event_enable_trigger(data, rec);
+ event_enable_trigger(data, rec, event);
}
int event_enable_trigger_print(struct seq_file *m,

View File

@ -1,56 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 20 Dec 2017 17:12:57 +0100
Subject: [PATCH 08/29] tracing/hrtimer: Take all clock bases and modes into
account
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
So far only CLOCK_MONOTONIC and CLOCK_REALTIME were taken into account as
well as HRTIMER_MODE_ABS/REL in hrtimer_init tracepoint. The query for
detecting timer mode ABS or REL is not valid, since the introduction of
HRTIMER_MODE_PINNED.
HRTIMER_MODE_PINNED is not evaluated in hrtimer_init() call. But for the
sake of completeness print all given modes.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/trace/events/timer.h | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -136,6 +136,20 @@ DEFINE_EVENT(timer_class, timer_cancel,
TP_ARGS(timer)
);
+#define decode_clockid(type) \
+ __print_symbolic(type, \
+ { CLOCK_REALTIME, "CLOCK_REALTIME" }, \
+ { CLOCK_MONOTONIC, "CLOCK_MONOTONIC" }, \
+ { CLOCK_BOOTTIME, "CLOCK_BOOTTIME" }, \
+ { CLOCK_TAI, "CLOCK_TAI" })
+
+#define decode_hrtimer_mode(mode) \
+ __print_symbolic(mode, \
+ { HRTIMER_MODE_ABS, "ABS" }, \
+ { HRTIMER_MODE_REL, "REL" }, \
+ { HRTIMER_MODE_ABS_PINNED, "ABS|PINNED" }, \
+ { HRTIMER_MODE_REL_PINNED, "REL|PINNED" })
+
/**
* hrtimer_init - called when the hrtimer is initialized
* @hrtimer: pointer to struct hrtimer
@@ -162,10 +176,8 @@ TRACE_EVENT(hrtimer_init,
),
TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer,
- __entry->clockid == CLOCK_REALTIME ?
- "CLOCK_REALTIME" : "CLOCK_MONOTONIC",
- __entry->mode == HRTIMER_MODE_ABS ?
- "HRTIMER_MODE_ABS" : "HRTIMER_MODE_REL")
+ decode_clockid(__entry->clockid),
+ decode_hrtimer_mode(__entry->mode))
);
/**

View File

@ -1,74 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:41 +0100
Subject: [PATCH 09/10] iommu/amd: Make amd_iommu_devtable_lock a spin_lock
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 2cd1083d79a0a8c223af430ca97884c28a1e2fc0
Before commit 0bb6e243d7fb ("iommu/amd: Support IOMMU_DOMAIN_DMA type
allocation") amd_iommu_devtable_lock had a read_lock() user but now
there are none. In fact, after the mentioned commit we had only
write_lock() user of the lock. Since there is no reason to keep it as
writer lock, change its type to a spin_lock.
I *think* that we might even be able to remove the lock because all its
current user seem to have their own protection.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -80,7 +80,7 @@
*/
#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
-static DEFINE_RWLOCK(amd_iommu_devtable_lock);
+static DEFINE_SPINLOCK(amd_iommu_devtable_lock);
static DEFINE_SPINLOCK(pd_bitmap_lock);
static DEFINE_SPINLOCK(iommu_table_lock);
@@ -2096,9 +2096,9 @@ static int attach_device(struct device *
}
skip_ats_check:
- write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
ret = __attach_device(dev_data, domain);
- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
/*
* We might boot into a crash-kernel here. The crashed kernel
@@ -2148,9 +2148,9 @@ static void detach_device(struct device
domain = dev_data->domain;
/* lock device table */
- write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
__detach_device(dev_data);
- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
if (!dev_is_pci(dev))
return;
@@ -2813,7 +2813,7 @@ static void cleanup_domain(struct protec
struct iommu_dev_data *entry;
unsigned long flags;
- write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
while (!list_empty(&domain->dev_list)) {
entry = list_first_entry(&domain->dev_list,
@@ -2821,7 +2821,7 @@ static void cleanup_domain(struct protec
__detach_device(entry);
}
- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
}
static void protection_domain_free(struct protection_domain *domain)

View File

@ -1,997 +0,0 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 5 Mar 2018 19:15:50 -0500
Subject: [PATCH 09/17] split d_path() and friends into a separate file
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 7a5cf791a747640adb2a1b5e3838321b26953a23
Those parts of fs/dcache.c are pretty much self-contained.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/Makefile | 2
fs/d_path.c | 470 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/dcache.c | 467 -----------------------------------------------------------
3 files changed, 472 insertions(+), 467 deletions(-)
create mode 100644 fs/d_path.c
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.
ioctl.o readdir.o select.o dcache.o inode.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
- pnode.o splice.o sync.o utimes.o \
+ pnode.o splice.o sync.o utimes.o d_path.o \
stack.o fs_struct.o statfs.o fs_pin.o nsfs.o
ifeq ($(CONFIG_BLOCK),y)
--- /dev/null
+++ b/fs/d_path.c
@@ -0,0 +1,470 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/syscalls.h>
+#include <linux/export.h>
+#include <linux/uaccess.h>
+#include <linux/fs_struct.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/prefetch.h>
+#include "mount.h"
+
+static int prepend(char **buffer, int *buflen, const char *str, int namelen)
+{
+ *buflen -= namelen;
+ if (*buflen < 0)
+ return -ENAMETOOLONG;
+ *buffer -= namelen;
+ memcpy(*buffer, str, namelen);
+ return 0;
+}
+
+/**
+ * prepend_name - prepend a pathname in front of current buffer pointer
+ * @buffer: buffer pointer
+ * @buflen: allocated length of the buffer
+ * @name: name string and length qstr structure
+ *
+ * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to
+ * make sure that either the old or the new name pointer and length are
+ * fetched. However, there may be mismatch between length and pointer.
+ * The length cannot be trusted, we need to copy it byte-by-byte until
+ * the length is reached or a null byte is found. It also prepends "/" at
+ * the beginning of the name. The sequence number check at the caller will
+ * retry it again when a d_move() does happen. So any garbage in the buffer
+ * due to mismatched pointer and length will be discarded.
+ *
+ * Load acquire is needed to make sure that we see that terminating NUL.
+ */
+static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
+{
+ const char *dname = smp_load_acquire(&name->name); /* ^^^ */
+ u32 dlen = READ_ONCE(name->len);
+ char *p;
+
+ *buflen -= dlen + 1;
+ if (*buflen < 0)
+ return -ENAMETOOLONG;
+ p = *buffer -= dlen + 1;
+ *p++ = '/';
+ while (dlen--) {
+ char c = *dname++;
+ if (!c)
+ break;
+ *p++ = c;
+ }
+ return 0;
+}
+
+/**
+ * prepend_path - Prepend path string to a buffer
+ * @path: the dentry/vfsmount to report
+ * @root: root vfsmnt/dentry
+ * @buffer: pointer to the end of the buffer
+ * @buflen: pointer to buffer length
+ *
+ * The function will first try to write out the pathname without taking any
+ * lock other than the RCU read lock to make sure that dentries won't go away.
+ * It only checks the sequence number of the global rename_lock as any change
+ * in the dentry's d_seq will be preceded by changes in the rename_lock
+ * sequence number. If the sequence number had been changed, it will restart
+ * the whole pathname back-tracing sequence again by taking the rename_lock.
+ * In this case, there is no need to take the RCU read lock as the recursive
+ * parent pointer references will keep the dentry chain alive as long as no
+ * rename operation is performed.
+ */
+static int prepend_path(const struct path *path,
+ const struct path *root,
+ char **buffer, int *buflen)
+{
+ struct dentry *dentry;
+ struct vfsmount *vfsmnt;
+ struct mount *mnt;
+ int error = 0;
+ unsigned seq, m_seq = 0;
+ char *bptr;
+ int blen;
+
+ rcu_read_lock();
+restart_mnt:
+ read_seqbegin_or_lock(&mount_lock, &m_seq);
+ seq = 0;
+ rcu_read_lock();
+restart:
+ bptr = *buffer;
+ blen = *buflen;
+ error = 0;
+ dentry = path->dentry;
+ vfsmnt = path->mnt;
+ mnt = real_mount(vfsmnt);
+ read_seqbegin_or_lock(&rename_lock, &seq);
+ while (dentry != root->dentry || vfsmnt != root->mnt) {
+ struct dentry * parent;
+
+ if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
+ struct mount *parent = READ_ONCE(mnt->mnt_parent);
+ /* Escaped? */
+ if (dentry != vfsmnt->mnt_root) {
+ bptr = *buffer;
+ blen = *buflen;
+ error = 3;
+ break;
+ }
+ /* Global root? */
+ if (mnt != parent) {
+ dentry = READ_ONCE(mnt->mnt_mountpoint);
+ mnt = parent;
+ vfsmnt = &mnt->mnt;
+ continue;
+ }
+ if (!error)
+ error = is_mounted(vfsmnt) ? 1 : 2;
+ break;
+ }
+ parent = dentry->d_parent;
+ prefetch(parent);
+ error = prepend_name(&bptr, &blen, &dentry->d_name);
+ if (error)
+ break;
+
+ dentry = parent;
+ }
+ if (!(seq & 1))
+ rcu_read_unlock();
+ if (need_seqretry(&rename_lock, seq)) {
+ seq = 1;
+ goto restart;
+ }
+ done_seqretry(&rename_lock, seq);
+
+ if (!(m_seq & 1))
+ rcu_read_unlock();
+ if (need_seqretry(&mount_lock, m_seq)) {
+ m_seq = 1;
+ goto restart_mnt;
+ }
+ done_seqretry(&mount_lock, m_seq);
+
+ if (error >= 0 && bptr == *buffer) {
+ if (--blen < 0)
+ error = -ENAMETOOLONG;
+ else
+ *--bptr = '/';
+ }
+ *buffer = bptr;
+ *buflen = blen;
+ return error;
+}
+
+/**
+ * __d_path - return the path of a dentry
+ * @path: the dentry/vfsmount to report
+ * @root: root vfsmnt/dentry
+ * @buf: buffer to return value in
+ * @buflen: buffer length
+ *
+ * Convert a dentry into an ASCII path name.
+ *
+ * Returns a pointer into the buffer or an error code if the
+ * path was too long.
+ *
+ * "buflen" should be positive.
+ *
+ * If the path is not reachable from the supplied root, return %NULL.
+ */
+char *__d_path(const struct path *path,
+ const struct path *root,
+ char *buf, int buflen)
+{
+ char *res = buf + buflen;
+ int error;
+
+ prepend(&res, &buflen, "\0", 1);
+ error = prepend_path(path, root, &res, &buflen);
+
+ if (error < 0)
+ return ERR_PTR(error);
+ if (error > 0)
+ return NULL;
+ return res;
+}
+
+char *d_absolute_path(const struct path *path,
+ char *buf, int buflen)
+{
+ struct path root = {};
+ char *res = buf + buflen;
+ int error;
+
+ prepend(&res, &buflen, "\0", 1);
+ error = prepend_path(path, &root, &res, &buflen);
+
+ if (error > 1)
+ error = -EINVAL;
+ if (error < 0)
+ return ERR_PTR(error);
+ return res;
+}
+
+/*
+ * same as __d_path but appends "(deleted)" for unlinked files.
+ */
+static int path_with_deleted(const struct path *path,
+ const struct path *root,
+ char **buf, int *buflen)
+{
+ prepend(buf, buflen, "\0", 1);
+ if (d_unlinked(path->dentry)) {
+ int error = prepend(buf, buflen, " (deleted)", 10);
+ if (error)
+ return error;
+ }
+
+ return prepend_path(path, root, buf, buflen);
+}
+
+static int prepend_unreachable(char **buffer, int *buflen)
+{
+ return prepend(buffer, buflen, "(unreachable)", 13);
+}
+
+static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
+{
+ unsigned seq;
+
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ *root = fs->root;
+ } while (read_seqcount_retry(&fs->seq, seq));
+}
+
+/**
+ * d_path - return the path of a dentry
+ * @path: path to report
+ * @buf: buffer to return value in
+ * @buflen: buffer length
+ *
+ * Convert a dentry into an ASCII path name. If the entry has been deleted
+ * the string " (deleted)" is appended. Note that this is ambiguous.
+ *
+ * Returns a pointer into the buffer or an error code if the path was
+ * too long. Note: Callers should use the returned pointer, not the passed
+ * in buffer, to use the name! The implementation often starts at an offset
+ * into the buffer, and may leave 0 bytes at the start.
+ *
+ * "buflen" should be positive.
+ */
+char *d_path(const struct path *path, char *buf, int buflen)
+{
+ char *res = buf + buflen;
+ struct path root;
+ int error;
+
+ /*
+ * We have various synthetic filesystems that never get mounted. On
+ * these filesystems dentries are never used for lookup purposes, and
+ * thus don't need to be hashed. They also don't need a name until a
+ * user wants to identify the object in /proc/pid/fd/. The little hack
+ * below allows us to generate a name for these objects on demand:
+ *
+ * Some pseudo inodes are mountable. When they are mounted
+ * path->dentry == path->mnt->mnt_root. In that case don't call d_dname
+ * and instead have d_path return the mounted path.
+ */
+ if (path->dentry->d_op && path->dentry->d_op->d_dname &&
+ (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
+ return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
+
+ rcu_read_lock();
+ get_fs_root_rcu(current->fs, &root);
+ error = path_with_deleted(path, &root, &res, &buflen);
+ rcu_read_unlock();
+
+ if (error < 0)
+ res = ERR_PTR(error);
+ return res;
+}
+EXPORT_SYMBOL(d_path);
+
+/*
+ * Helper function for dentry_operations.d_dname() members
+ */
+char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
+ const char *fmt, ...)
+{
+ va_list args;
+ char temp[64];
+ int sz;
+
+ va_start(args, fmt);
+ sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
+ va_end(args);
+
+ if (sz > sizeof(temp) || sz > buflen)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ buffer += buflen - sz;
+ return memcpy(buffer, temp, sz);
+}
+
+char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+ char *end = buffer + buflen;
+ /* these dentries are never renamed, so d_lock is not needed */
+ if (prepend(&end, &buflen, " (deleted)", 11) ||
+ prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
+ prepend(&end, &buflen, "/", 1))
+ end = ERR_PTR(-ENAMETOOLONG);
+ return end;
+}
+EXPORT_SYMBOL(simple_dname);
+
+/*
+ * Write full pathname from the root of the filesystem into the buffer.
+ */
+static char *__dentry_path(struct dentry *d, char *buf, int buflen)
+{
+ struct dentry *dentry;
+ char *end, *retval;
+ int len, seq = 0;
+ int error = 0;
+
+ if (buflen < 2)
+ goto Elong;
+
+ rcu_read_lock();
+restart:
+ dentry = d;
+ end = buf + buflen;
+ len = buflen;
+ prepend(&end, &len, "\0", 1);
+ /* Get '/' right */
+ retval = end-1;
+ *retval = '/';
+ read_seqbegin_or_lock(&rename_lock, &seq);
+ while (!IS_ROOT(dentry)) {
+ struct dentry *parent = dentry->d_parent;
+
+ prefetch(parent);
+ error = prepend_name(&end, &len, &dentry->d_name);
+ if (error)
+ break;
+
+ retval = end;
+ dentry = parent;
+ }
+ if (!(seq & 1))
+ rcu_read_unlock();
+ if (need_seqretry(&rename_lock, seq)) {
+ seq = 1;
+ goto restart;
+ }
+ done_seqretry(&rename_lock, seq);
+ if (error)
+ goto Elong;
+ return retval;
+Elong:
+ return ERR_PTR(-ENAMETOOLONG);
+}
+
+char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
+{
+ return __dentry_path(dentry, buf, buflen);
+}
+EXPORT_SYMBOL(dentry_path_raw);
+
+char *dentry_path(struct dentry *dentry, char *buf, int buflen)
+{
+ char *p = NULL;
+ char *retval;
+
+ if (d_unlinked(dentry)) {
+ p = buf + buflen;
+ if (prepend(&p, &buflen, "//deleted", 10) != 0)
+ goto Elong;
+ buflen++;
+ }
+ retval = __dentry_path(dentry, buf, buflen);
+ if (!IS_ERR(retval) && p)
+ *p = '/'; /* restore '/' overriden with '\0' */
+ return retval;
+Elong:
+ return ERR_PTR(-ENAMETOOLONG);
+}
+
+static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
+ struct path *pwd)
+{
+ unsigned seq;
+
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ *root = fs->root;
+ *pwd = fs->pwd;
+ } while (read_seqcount_retry(&fs->seq, seq));
+}
+
+/*
+ * NOTE! The user-level library version returns a
+ * character pointer. The kernel system call just
+ * returns the length of the buffer filled (which
+ * includes the ending '\0' character), or a negative
+ * error value. So libc would do something like
+ *
+ * char *getcwd(char * buf, size_t size)
+ * {
+ * int retval;
+ *
+ * retval = sys_getcwd(buf, size);
+ * if (retval >= 0)
+ * return buf;
+ * errno = -retval;
+ * return NULL;
+ * }
+ */
+SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
+{
+ int error;
+ struct path pwd, root;
+ char *page = __getname();
+
+ if (!page)
+ return -ENOMEM;
+
+ rcu_read_lock();
+ get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
+
+ error = -ENOENT;
+ if (!d_unlinked(pwd.dentry)) {
+ unsigned long len;
+ char *cwd = page + PATH_MAX;
+ int buflen = PATH_MAX;
+
+ prepend(&cwd, &buflen, "\0", 1);
+ error = prepend_path(&pwd, &root, &cwd, &buflen);
+ rcu_read_unlock();
+
+ if (error < 0)
+ goto out;
+
+ /* Unreachable from current root */
+ if (error > 0) {
+ error = prepend_unreachable(&cwd, &buflen);
+ if (error)
+ goto out;
+ }
+
+ error = -ERANGE;
+ len = PATH_MAX + page - cwd;
+ if (len <= size) {
+ error = len;
+ if (copy_to_user(buf, cwd, len))
+ error = -EFAULT;
+ }
+ } else {
+ rcu_read_unlock();
+ }
+
+out:
+ __putname(page);
+ return error;
+}
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -14,7 +14,7 @@
* the dcache entry is deleted or garbage collected.
*/
-#include <linux/syscalls.h>
+#include <linux/ratelimit.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/fs.h>
@@ -24,15 +24,11 @@
#include <linux/hash.h>
#include <linux/cache.h>
#include <linux/export.h>
-#include <linux/mount.h>
-#include <linux/uaccess.h>
#include <linux/security.h>
#include <linux/seqlock.h>
#include <linux/bootmem.h>
-#include <linux/fs_struct.h>
#include <linux/bit_spinlock.h>
#include <linux/rculist_bl.h>
-#include <linux/prefetch.h>
#include <linux/list_lru.h>
#include "internal.h"
#include "mount.h"
@@ -3072,467 +3068,6 @@ struct dentry *d_splice_alias(struct ino
}
EXPORT_SYMBOL(d_splice_alias);
-static int prepend(char **buffer, int *buflen, const char *str, int namelen)
-{
- *buflen -= namelen;
- if (*buflen < 0)
- return -ENAMETOOLONG;
- *buffer -= namelen;
- memcpy(*buffer, str, namelen);
- return 0;
-}
-
-/**
- * prepend_name - prepend a pathname in front of current buffer pointer
- * @buffer: buffer pointer
- * @buflen: allocated length of the buffer
- * @name: name string and length qstr structure
- *
- * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to
- * make sure that either the old or the new name pointer and length are
- * fetched. However, there may be mismatch between length and pointer.
- * The length cannot be trusted, we need to copy it byte-by-byte until
- * the length is reached or a null byte is found. It also prepends "/" at
- * the beginning of the name. The sequence number check at the caller will
- * retry it again when a d_move() does happen. So any garbage in the buffer
- * due to mismatched pointer and length will be discarded.
- *
- * Load acquire is needed to make sure that we see that terminating NUL.
- */
-static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
-{
- const char *dname = smp_load_acquire(&name->name); /* ^^^ */
- u32 dlen = READ_ONCE(name->len);
- char *p;
-
- *buflen -= dlen + 1;
- if (*buflen < 0)
- return -ENAMETOOLONG;
- p = *buffer -= dlen + 1;
- *p++ = '/';
- while (dlen--) {
- char c = *dname++;
- if (!c)
- break;
- *p++ = c;
- }
- return 0;
-}
-
-/**
- * prepend_path - Prepend path string to a buffer
- * @path: the dentry/vfsmount to report
- * @root: root vfsmnt/dentry
- * @buffer: pointer to the end of the buffer
- * @buflen: pointer to buffer length
- *
- * The function will first try to write out the pathname without taking any
- * lock other than the RCU read lock to make sure that dentries won't go away.
- * It only checks the sequence number of the global rename_lock as any change
- * in the dentry's d_seq will be preceded by changes in the rename_lock
- * sequence number. If the sequence number had been changed, it will restart
- * the whole pathname back-tracing sequence again by taking the rename_lock.
- * In this case, there is no need to take the RCU read lock as the recursive
- * parent pointer references will keep the dentry chain alive as long as no
- * rename operation is performed.
- */
-static int prepend_path(const struct path *path,
- const struct path *root,
- char **buffer, int *buflen)
-{
- struct dentry *dentry;
- struct vfsmount *vfsmnt;
- struct mount *mnt;
- int error = 0;
- unsigned seq, m_seq = 0;
- char *bptr;
- int blen;
-
- rcu_read_lock();
-restart_mnt:
- read_seqbegin_or_lock(&mount_lock, &m_seq);
- seq = 0;
- rcu_read_lock();
-restart:
- bptr = *buffer;
- blen = *buflen;
- error = 0;
- dentry = path->dentry;
- vfsmnt = path->mnt;
- mnt = real_mount(vfsmnt);
- read_seqbegin_or_lock(&rename_lock, &seq);
- while (dentry != root->dentry || vfsmnt != root->mnt) {
- struct dentry * parent;
-
- if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
- struct mount *parent = READ_ONCE(mnt->mnt_parent);
- /* Escaped? */
- if (dentry != vfsmnt->mnt_root) {
- bptr = *buffer;
- blen = *buflen;
- error = 3;
- break;
- }
- /* Global root? */
- if (mnt != parent) {
- dentry = READ_ONCE(mnt->mnt_mountpoint);
- mnt = parent;
- vfsmnt = &mnt->mnt;
- continue;
- }
- if (!error)
- error = is_mounted(vfsmnt) ? 1 : 2;
- break;
- }
- parent = dentry->d_parent;
- prefetch(parent);
- error = prepend_name(&bptr, &blen, &dentry->d_name);
- if (error)
- break;
-
- dentry = parent;
- }
- if (!(seq & 1))
- rcu_read_unlock();
- if (need_seqretry(&rename_lock, seq)) {
- seq = 1;
- goto restart;
- }
- done_seqretry(&rename_lock, seq);
-
- if (!(m_seq & 1))
- rcu_read_unlock();
- if (need_seqretry(&mount_lock, m_seq)) {
- m_seq = 1;
- goto restart_mnt;
- }
- done_seqretry(&mount_lock, m_seq);
-
- if (error >= 0 && bptr == *buffer) {
- if (--blen < 0)
- error = -ENAMETOOLONG;
- else
- *--bptr = '/';
- }
- *buffer = bptr;
- *buflen = blen;
- return error;
-}
-
-/**
- * __d_path - return the path of a dentry
- * @path: the dentry/vfsmount to report
- * @root: root vfsmnt/dentry
- * @buf: buffer to return value in
- * @buflen: buffer length
- *
- * Convert a dentry into an ASCII path name.
- *
- * Returns a pointer into the buffer or an error code if the
- * path was too long.
- *
- * "buflen" should be positive.
- *
- * If the path is not reachable from the supplied root, return %NULL.
- */
-char *__d_path(const struct path *path,
- const struct path *root,
- char *buf, int buflen)
-{
- char *res = buf + buflen;
- int error;
-
- prepend(&res, &buflen, "\0", 1);
- error = prepend_path(path, root, &res, &buflen);
-
- if (error < 0)
- return ERR_PTR(error);
- if (error > 0)
- return NULL;
- return res;
-}
-
-char *d_absolute_path(const struct path *path,
- char *buf, int buflen)
-{
- struct path root = {};
- char *res = buf + buflen;
- int error;
-
- prepend(&res, &buflen, "\0", 1);
- error = prepend_path(path, &root, &res, &buflen);
-
- if (error > 1)
- error = -EINVAL;
- if (error < 0)
- return ERR_PTR(error);
- return res;
-}
-
-/*
- * same as __d_path but appends "(deleted)" for unlinked files.
- */
-static int path_with_deleted(const struct path *path,
- const struct path *root,
- char **buf, int *buflen)
-{
- prepend(buf, buflen, "\0", 1);
- if (d_unlinked(path->dentry)) {
- int error = prepend(buf, buflen, " (deleted)", 10);
- if (error)
- return error;
- }
-
- return prepend_path(path, root, buf, buflen);
-}
-
-static int prepend_unreachable(char **buffer, int *buflen)
-{
- return prepend(buffer, buflen, "(unreachable)", 13);
-}
-
-static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
-{
- unsigned seq;
-
- do {
- seq = read_seqcount_begin(&fs->seq);
- *root = fs->root;
- } while (read_seqcount_retry(&fs->seq, seq));
-}
-
-/**
- * d_path - return the path of a dentry
- * @path: path to report
- * @buf: buffer to return value in
- * @buflen: buffer length
- *
- * Convert a dentry into an ASCII path name. If the entry has been deleted
- * the string " (deleted)" is appended. Note that this is ambiguous.
- *
- * Returns a pointer into the buffer or an error code if the path was
- * too long. Note: Callers should use the returned pointer, not the passed
- * in buffer, to use the name! The implementation often starts at an offset
- * into the buffer, and may leave 0 bytes at the start.
- *
- * "buflen" should be positive.
- */
-char *d_path(const struct path *path, char *buf, int buflen)
-{
- char *res = buf + buflen;
- struct path root;
- int error;
-
- /*
- * We have various synthetic filesystems that never get mounted. On
- * these filesystems dentries are never used for lookup purposes, and
- * thus don't need to be hashed. They also don't need a name until a
- * user wants to identify the object in /proc/pid/fd/. The little hack
- * below allows us to generate a name for these objects on demand:
- *
- * Some pseudo inodes are mountable. When they are mounted
- * path->dentry == path->mnt->mnt_root. In that case don't call d_dname
- * and instead have d_path return the mounted path.
- */
- if (path->dentry->d_op && path->dentry->d_op->d_dname &&
- (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
- return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
-
- rcu_read_lock();
- get_fs_root_rcu(current->fs, &root);
- error = path_with_deleted(path, &root, &res, &buflen);
- rcu_read_unlock();
-
- if (error < 0)
- res = ERR_PTR(error);
- return res;
-}
-EXPORT_SYMBOL(d_path);
-
-/*
- * Helper function for dentry_operations.d_dname() members
- */
-char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
- const char *fmt, ...)
-{
- va_list args;
- char temp[64];
- int sz;
-
- va_start(args, fmt);
- sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
- va_end(args);
-
- if (sz > sizeof(temp) || sz > buflen)
- return ERR_PTR(-ENAMETOOLONG);
-
- buffer += buflen - sz;
- return memcpy(buffer, temp, sz);
-}
-
-char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
-{
- char *end = buffer + buflen;
- /* these dentries are never renamed, so d_lock is not needed */
- if (prepend(&end, &buflen, " (deleted)", 11) ||
- prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
- prepend(&end, &buflen, "/", 1))
- end = ERR_PTR(-ENAMETOOLONG);
- return end;
-}
-EXPORT_SYMBOL(simple_dname);
-
-/*
- * Write full pathname from the root of the filesystem into the buffer.
- */
-static char *__dentry_path(struct dentry *d, char *buf, int buflen)
-{
- struct dentry *dentry;
- char *end, *retval;
- int len, seq = 0;
- int error = 0;
-
- if (buflen < 2)
- goto Elong;
-
- rcu_read_lock();
-restart:
- dentry = d;
- end = buf + buflen;
- len = buflen;
- prepend(&end, &len, "\0", 1);
- /* Get '/' right */
- retval = end-1;
- *retval = '/';
- read_seqbegin_or_lock(&rename_lock, &seq);
- while (!IS_ROOT(dentry)) {
- struct dentry *parent = dentry->d_parent;
-
- prefetch(parent);
- error = prepend_name(&end, &len, &dentry->d_name);
- if (error)
- break;
-
- retval = end;
- dentry = parent;
- }
- if (!(seq & 1))
- rcu_read_unlock();
- if (need_seqretry(&rename_lock, seq)) {
- seq = 1;
- goto restart;
- }
- done_seqretry(&rename_lock, seq);
- if (error)
- goto Elong;
- return retval;
-Elong:
- return ERR_PTR(-ENAMETOOLONG);
-}
-
-char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
-{
- return __dentry_path(dentry, buf, buflen);
-}
-EXPORT_SYMBOL(dentry_path_raw);
-
-char *dentry_path(struct dentry *dentry, char *buf, int buflen)
-{
- char *p = NULL;
- char *retval;
-
- if (d_unlinked(dentry)) {
- p = buf + buflen;
- if (prepend(&p, &buflen, "//deleted", 10) != 0)
- goto Elong;
- buflen++;
- }
- retval = __dentry_path(dentry, buf, buflen);
- if (!IS_ERR(retval) && p)
- *p = '/'; /* restore '/' overriden with '\0' */
- return retval;
-Elong:
- return ERR_PTR(-ENAMETOOLONG);
-}
-
-static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
- struct path *pwd)
-{
- unsigned seq;
-
- do {
- seq = read_seqcount_begin(&fs->seq);
- *root = fs->root;
- *pwd = fs->pwd;
- } while (read_seqcount_retry(&fs->seq, seq));
-}
-
-/*
- * NOTE! The user-level library version returns a
- * character pointer. The kernel system call just
- * returns the length of the buffer filled (which
- * includes the ending '\0' character), or a negative
- * error value. So libc would do something like
- *
- * char *getcwd(char * buf, size_t size)
- * {
- * int retval;
- *
- * retval = sys_getcwd(buf, size);
- * if (retval >= 0)
- * return buf;
- * errno = -retval;
- * return NULL;
- * }
- */
-SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
-{
- int error;
- struct path pwd, root;
- char *page = __getname();
-
- if (!page)
- return -ENOMEM;
-
- rcu_read_lock();
- get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
-
- error = -ENOENT;
- if (!d_unlinked(pwd.dentry)) {
- unsigned long len;
- char *cwd = page + PATH_MAX;
- int buflen = PATH_MAX;
-
- prepend(&cwd, &buflen, "\0", 1);
- error = prepend_path(&pwd, &root, &cwd, &buflen);
- rcu_read_unlock();
-
- if (error < 0)
- goto out;
-
- /* Unreachable from current root */
- if (error > 0) {
- error = prepend_unreachable(&cwd, &buflen);
- if (error)
- goto out;
- }
-
- error = -ERANGE;
- len = PATH_MAX + page - cwd;
- if (len <= size) {
- error = len;
- if (copy_to_user(buf, cwd, len))
- error = -EFAULT;
- }
- } else {
- rcu_read_unlock();
- }
-
-out:
- __putname(page);
- return error;
-}
-
/*
* Test whether new_dentry is a subdirectory of old_dentry.
*

View File

@ -1,140 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:43 -0600
Subject: [PATCH 09/37] tracing: Add ring buffer event param to hist field
functions
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Some events such as timestamps require access to a ring_buffer_event
struct; add a param so that hist field functions can access that.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 39 ++++++++++++++++++++++++---------------
1 file changed, 24 insertions(+), 15 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -26,7 +26,8 @@
struct hist_field;
-typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event);
+typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event,
+ struct ring_buffer_event *rbe);
#define HIST_FIELD_OPERANDS_MAX 2
@@ -40,24 +41,28 @@ struct hist_field {
struct hist_field *operands[HIST_FIELD_OPERANDS_MAX];
};
-static u64 hist_field_none(struct hist_field *field, void *event)
+static u64 hist_field_none(struct hist_field *field, void *event,
+ struct ring_buffer_event *rbe)
{
return 0;
}
-static u64 hist_field_counter(struct hist_field *field, void *event)
+static u64 hist_field_counter(struct hist_field *field, void *event,
+ struct ring_buffer_event *rbe)
{
return 1;
}
-static u64 hist_field_string(struct hist_field *hist_field, void *event)
+static u64 hist_field_string(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
{
char *addr = (char *)(event + hist_field->field->offset);
return (u64)(unsigned long)addr;
}
-static u64 hist_field_dynstring(struct hist_field *hist_field, void *event)
+static u64 hist_field_dynstring(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
{
u32 str_item = *(u32 *)(event + hist_field->field->offset);
int str_loc = str_item & 0xffff;
@@ -66,24 +71,28 @@ static u64 hist_field_dynstring(struct h
return (u64)(unsigned long)addr;
}
-static u64 hist_field_pstring(struct hist_field *hist_field, void *event)
+static u64 hist_field_pstring(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
{
char **addr = (char **)(event + hist_field->field->offset);
return (u64)(unsigned long)*addr;
}
-static u64 hist_field_log2(struct hist_field *hist_field, void *event)
+static u64 hist_field_log2(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
{
struct hist_field *operand = hist_field->operands[0];
- u64 val = operand->fn(operand, event);
+ u64 val = operand->fn(operand, event, rbe);
return (u64) ilog2(roundup_pow_of_two(val));
}
#define DEFINE_HIST_FIELD_FN(type) \
-static u64 hist_field_##type(struct hist_field *hist_field, void *event)\
+ static u64 hist_field_##type(struct hist_field *hist_field, \
+ void *event, \
+ struct ring_buffer_event *rbe) \
{ \
type *addr = (type *)(event + hist_field->field->offset); \
\
@@ -871,8 +880,8 @@ create_hist_data(unsigned int map_bits,
}
static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
- struct tracing_map_elt *elt,
- void *rec)
+ struct tracing_map_elt *elt, void *rec,
+ struct ring_buffer_event *rbe)
{
struct hist_field *hist_field;
unsigned int i;
@@ -880,7 +889,7 @@ static void hist_trigger_elt_update(stru
for_each_hist_val_field(i, hist_data) {
hist_field = hist_data->fields[i];
- hist_val = hist_field->fn(hist_field, rec);
+ hist_val = hist_field->fn(hist_field, rec, rbe);
tracing_map_update_sum(elt, i, hist_val);
}
}
@@ -910,7 +919,7 @@ static inline void add_to_key(char *comp
}
static void event_hist_trigger(struct event_trigger_data *data, void *rec,
- struct ring_buffer_event *event)
+ struct ring_buffer_event *rbe)
{
struct hist_trigger_data *hist_data = data->private_data;
bool use_compound_key = (hist_data->n_keys > 1);
@@ -939,7 +948,7 @@ static void event_hist_trigger(struct ev
key = entries;
} else {
- field_contents = key_field->fn(key_field, rec);
+ field_contents = key_field->fn(key_field, rec, rbe);
if (key_field->flags & HIST_FIELD_FL_STRING) {
key = (void *)(unsigned long)field_contents;
use_compound_key = true;
@@ -956,7 +965,7 @@ static void event_hist_trigger(struct ev
elt = tracing_map_insert(hist_data->map, key);
if (elt)
- hist_trigger_elt_update(hist_data, elt, rec);
+ hist_trigger_elt_update(hist_data, elt, rec, rbe);
}
static void hist_trigger_stacktrace_print(struct seq_file *m,

View File

@ -1,115 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Fri, 22 Sep 2017 14:58:23 -0500
Subject: [PATCH 09/42] tracing: Reimplement log2
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
log2 as currently implemented applies only to u64 trace_event_field
derived fields, and assumes that anything it's applied to is a u64
field.
To prepare for synthetic fields like latencies, log2 should be
applicable to those as well, so take the opportunity now to fix the
current problems as well as expand to more general uses.
log2 should be thought of as a chaining function rather than a field
type. To enable this as well as possible future function
implementations, add a hist_field operand array into the hist_field
definition for this purpose, and make use of it to implement the log2
'function'.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 31 +++++++++++++++++++++++++++----
1 file changed, 27 insertions(+), 4 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -28,12 +28,16 @@ struct hist_field;
typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event);
+#define HIST_FIELD_OPERANDS_MAX 2
+
struct hist_field {
struct ftrace_event_field *field;
unsigned long flags;
hist_field_fn_t fn;
unsigned int size;
unsigned int offset;
+ unsigned int is_signed;
+ struct hist_field *operands[HIST_FIELD_OPERANDS_MAX];
};
static u64 hist_field_none(struct hist_field *field, void *event)
@@ -71,7 +75,9 @@ static u64 hist_field_pstring(struct his
static u64 hist_field_log2(struct hist_field *hist_field, void *event)
{
- u64 val = *(u64 *)(event + hist_field->field->offset);
+ struct hist_field *operand = hist_field->operands[0];
+
+ u64 val = operand->fn(operand, event);
return (u64) ilog2(roundup_pow_of_two(val));
}
@@ -156,6 +162,8 @@ static const char *hist_field_name(struc
if (field->field)
field_name = field->field->name;
+ else if (field->flags & HIST_FIELD_FL_LOG2)
+ field_name = hist_field_name(field->operands[0], ++level);
if (field_name == NULL)
field_name = "";
@@ -357,8 +365,20 @@ static const struct tracing_map_ops hist
.elt_init = hist_trigger_elt_comm_init,
};
-static void destroy_hist_field(struct hist_field *hist_field)
+static void destroy_hist_field(struct hist_field *hist_field,
+ unsigned int level)
{
+ unsigned int i;
+
+ if (level > 2)
+ return;
+
+ if (!hist_field)
+ return;
+
+ for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++)
+ destroy_hist_field(hist_field->operands[i], level + 1);
+
kfree(hist_field);
}
@@ -385,7 +405,10 @@ static struct hist_field *create_hist_fi
}
if (flags & HIST_FIELD_FL_LOG2) {
+ unsigned long fl = flags & ~HIST_FIELD_FL_LOG2;
hist_field->fn = hist_field_log2;
+ hist_field->operands[0] = create_hist_field(field, fl);
+ hist_field->size = hist_field->operands[0]->size;
goto out;
}
@@ -405,7 +428,7 @@ static struct hist_field *create_hist_fi
hist_field->fn = select_value_fn(field->size,
field->is_signed);
if (!hist_field->fn) {
- destroy_hist_field(hist_field);
+ destroy_hist_field(hist_field, 0);
return NULL;
}
}
@@ -422,7 +445,7 @@ static void destroy_hist_fields(struct h
for (i = 0; i < TRACING_MAP_FIELDS_MAX; i++) {
if (hist_data->fields[i]) {
- destroy_hist_field(hist_data->fields[i]);
+ destroy_hist_field(hist_data->fields[i], 0);
hist_data->fields[i] = NULL;
}
}

View File

@ -1,114 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 20 Dec 2017 17:12:58 +0100
Subject: [PATCH 09/29] tracing/hrtimer: Print hrtimer mode in hrtimer_start
tracepoint
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
The hrtimer_start tracepoint lacks the mode information. The mode is
important because consecutive starts can switch from ABS to REL or from
PINNED to non PINNED.
Add the mode information.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/trace/events/timer.h | 13 ++++++++-----
kernel/time/hrtimer.c | 16 +++++++++-------
2 files changed, 17 insertions(+), 12 deletions(-)
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -186,15 +186,16 @@ TRACE_EVENT(hrtimer_init,
*/
TRACE_EVENT(hrtimer_start,
- TP_PROTO(struct hrtimer *hrtimer),
+ TP_PROTO(struct hrtimer *hrtimer, enum hrtimer_mode mode),
- TP_ARGS(hrtimer),
+ TP_ARGS(hrtimer, mode),
TP_STRUCT__entry(
__field( void *, hrtimer )
__field( void *, function )
__field( s64, expires )
__field( s64, softexpires )
+ __field( enum hrtimer_mode, mode )
),
TP_fast_assign(
@@ -202,12 +203,14 @@ TRACE_EVENT(hrtimer_start,
__entry->function = hrtimer->function;
__entry->expires = hrtimer_get_expires(hrtimer);
__entry->softexpires = hrtimer_get_softexpires(hrtimer);
+ __entry->mode = mode;
),
- TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu",
- __entry->hrtimer, __entry->function,
+ TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu "
+ "mode=%s", __entry->hrtimer, __entry->function,
(unsigned long long) __entry->expires,
- (unsigned long long) __entry->softexpires)
+ (unsigned long long) __entry->softexpires,
+ decode_hrtimer_mode(__entry->mode))
);
/**
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -435,10 +435,11 @@ debug_init(struct hrtimer *timer, clocki
trace_hrtimer_init(timer, clockid, mode);
}
-static inline void debug_activate(struct hrtimer *timer)
+static inline void debug_activate(struct hrtimer *timer,
+ enum hrtimer_mode mode)
{
debug_hrtimer_activate(timer);
- trace_hrtimer_start(timer);
+ trace_hrtimer_start(timer, mode);
}
static inline void debug_deactivate(struct hrtimer *timer)
@@ -830,9 +831,10 @@ EXPORT_SYMBOL_GPL(hrtimer_forward);
* Returns 1 when the new timer is the leftmost timer in the tree.
*/
static int enqueue_hrtimer(struct hrtimer *timer,
- struct hrtimer_clock_base *base)
+ struct hrtimer_clock_base *base,
+ enum hrtimer_mode mode)
{
- debug_activate(timer);
+ debug_activate(timer, mode);
base->cpu_base->active_bases |= 1 << base->index;
@@ -955,7 +957,7 @@ void hrtimer_start_range_ns(struct hrtim
/* Switch the timer base, if necessary: */
new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
- leftmost = enqueue_hrtimer(timer, new_base);
+ leftmost = enqueue_hrtimer(timer, new_base, mode);
if (!leftmost)
goto unlock;
@@ -1224,7 +1226,7 @@ static void __run_hrtimer(struct hrtimer
*/
if (restart != HRTIMER_NORESTART &&
!(timer->state & HRTIMER_STATE_ENQUEUED))
- enqueue_hrtimer(timer, base);
+ enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS);
/*
* Separate the ->running assignment from the ->state assignment.
@@ -1623,7 +1625,7 @@ static void migrate_hrtimer_list(struct
* sort out already expired timers and reprogram the
* event device.
*/
- enqueue_hrtimer(timer, new_base);
+ enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS);
}
}

View File

@ -1,85 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 20 Dec 2017 17:12:59 +0100
Subject: [PATCH 10/29] hrtimer: Switch for loop to _ffs() evaluation
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Looping over all clock bases to find active bits is suboptimal if not all
bases are active.
Avoid this by converting it to a __ffs() evaluation. The functionallity is
outsourced into an own function and is called via a macro as suggested by
Peter Zijlstra.
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/time/hrtimer.c | 31 +++++++++++++++++++++----------
1 file changed, 21 insertions(+), 10 deletions(-)
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -448,6 +448,23 @@ static inline void debug_deactivate(stru
trace_hrtimer_cancel(timer);
}
+static struct hrtimer_clock_base *
+__next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active)
+{
+ unsigned int idx;
+
+ if (!*active)
+ return NULL;
+
+ idx = __ffs(*active);
+ *active &= ~(1U << idx);
+
+ return &cpu_base->clock_base[idx];
+}
+
+#define for_each_active_base(base, cpu_base, active) \
+ while ((base = __next_base((cpu_base), &(active))))
+
#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base,
struct hrtimer *timer)
@@ -459,18 +476,15 @@ static inline void hrtimer_update_next_t
static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
{
- struct hrtimer_clock_base *base = cpu_base->clock_base;
+ struct hrtimer_clock_base *base;
unsigned int active = cpu_base->active_bases;
ktime_t expires, expires_next = KTIME_MAX;
hrtimer_update_next_timer(cpu_base, NULL);
- for (; active; base++, active >>= 1) {
+ for_each_active_base(base, cpu_base, active) {
struct timerqueue_node *next;
struct hrtimer *timer;
- if (!(active & 0x01))
- continue;
-
next = timerqueue_getnext(&base->active);
timer = container_of(next, struct hrtimer, node);
expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
@@ -1243,16 +1257,13 @@ static void __run_hrtimer(struct hrtimer
static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
{
- struct hrtimer_clock_base *base = cpu_base->clock_base;
+ struct hrtimer_clock_base *base;
unsigned int active = cpu_base->active_bases;
- for (; active; base++, active >>= 1) {
+ for_each_active_base(base, cpu_base, active) {
struct timerqueue_node *node;
ktime_t basenow;
- if (!(active & 0x01))
- continue;
-
basenow = ktime_add(now, base->offset);
while ((node = timerqueue_getnext(&base->active))) {

View File

@ -1,41 +0,0 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:42 +0100
Subject: [PATCH 10/10] iommu/amd: Return proper error code in
irq_remapping_alloc()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 29d049be9438278c47253a74cf8d0ddf36bd5d68
In the unlikely case when alloc_irq_table() is not able to return a
remap table then "ret" will be assigned with an error code. Later, the
code checks `index' and if it is negative (which it is because it is
initialized with `-1') and then then function properly aborts but
returns `-1' instead `-ENOMEM' what was intended.
In order to correct this, I assign -ENOMEM to index.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4110,7 +4110,7 @@ static int irq_remapping_alloc(struct ir
struct amd_ir_data *data = NULL;
struct irq_cfg *cfg;
int i, ret, devid;
- int index = -1;
+ int index;
if (!info)
return -EINVAL;
@@ -4152,7 +4152,7 @@ static int irq_remapping_alloc(struct ir
WARN_ON(table->min_index != 32);
index = info->ioapic_pin;
} else {
- ret = -ENOMEM;
+ index = -ENOMEM;
}
} else {
bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI);

View File

@ -1,25 +0,0 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 7 Mar 2018 12:47:04 -0500
Subject: [PATCH 10/17] take out orphan externs (empty_string/slash_string)
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 903ddaf49329076862d65f7284d825759ff67bd6
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/dcache.h | 2 --
1 file changed, 2 deletions(-)
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -56,9 +56,7 @@ struct qstr {
#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
-extern const char empty_string[];
extern const struct qstr empty_name;
-extern const char slash_string[];
extern const struct qstr slash_name;
struct dentry_stat_t {

View File

@ -1,28 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:36 -0600
Subject: [PATCH 10/48] tracing: Add Documentation for log2 modifier
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Add a line for the log2 modifier, to keep it aligned with
tracing/README.
Link: http://lkml.kernel.org/r/a419028bccab155749a4b8702d5b97af75f1578f.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit fcb5b95a2bb931f8e72e2dbd2def67382dd99d42)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
Documentation/trace/histogram.txt | 1 +
1 file changed, 1 insertion(+)
--- a/Documentation/trace/histogram.txt
+++ b/Documentation/trace/histogram.txt
@@ -73,6 +73,7 @@
.sym-offset display an address as a symbol and offset
.syscall display a syscall id as a system call name
.execname display a common_pid as a program name
+ .log2 display log2 value rather than raw number
Note that in general the semantics of a given field aren't
interpreted when applying a modifier to it, but there are some

View File

@ -1,109 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:44 -0600
Subject: [PATCH 10/37] tracing: Break out hist trigger assignment parsing
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
This will make it easier to add variables, and makes the parsing code
cleaner regardless.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Rajvi Jingar <rajvi.jingar@intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 72 +++++++++++++++++++++++++++------------
1 file changed, 51 insertions(+), 21 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -251,6 +251,51 @@ static void destroy_hist_trigger_attrs(s
kfree(attrs);
}
+static int parse_assignment(char *str, struct hist_trigger_attrs *attrs)
+{
+ int ret = 0;
+
+ if ((strncmp(str, "key=", strlen("key=")) == 0) ||
+ (strncmp(str, "keys=", strlen("keys=")) == 0)) {
+ attrs->keys_str = kstrdup(str, GFP_KERNEL);
+ if (!attrs->keys_str) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ } else if ((strncmp(str, "val=", strlen("val=")) == 0) ||
+ (strncmp(str, "vals=", strlen("vals=")) == 0) ||
+ (strncmp(str, "values=", strlen("values=")) == 0)) {
+ attrs->vals_str = kstrdup(str, GFP_KERNEL);
+ if (!attrs->vals_str) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ } else if (strncmp(str, "sort=", strlen("sort=")) == 0) {
+ attrs->sort_key_str = kstrdup(str, GFP_KERNEL);
+ if (!attrs->sort_key_str) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ } else if (strncmp(str, "name=", strlen("name=")) == 0) {
+ attrs->name = kstrdup(str, GFP_KERNEL);
+ if (!attrs->name) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ } else if (strncmp(str, "size=", strlen("size=")) == 0) {
+ int map_bits = parse_map_size(str);
+
+ if (map_bits < 0) {
+ ret = map_bits;
+ goto out;
+ }
+ attrs->map_bits = map_bits;
+ } else
+ ret = -EINVAL;
+ out:
+ return ret;
+}
+
static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str)
{
struct hist_trigger_attrs *attrs;
@@ -263,33 +308,18 @@ static struct hist_trigger_attrs *parse_
while (trigger_str) {
char *str = strsep(&trigger_str, ":");
- if ((strncmp(str, "key=", strlen("key=")) == 0) ||
- (strncmp(str, "keys=", strlen("keys=")) == 0))
- attrs->keys_str = kstrdup(str, GFP_KERNEL);
- else if ((strncmp(str, "val=", strlen("val=")) == 0) ||
- (strncmp(str, "vals=", strlen("vals=")) == 0) ||
- (strncmp(str, "values=", strlen("values=")) == 0))
- attrs->vals_str = kstrdup(str, GFP_KERNEL);
- else if (strncmp(str, "sort=", strlen("sort=")) == 0)
- attrs->sort_key_str = kstrdup(str, GFP_KERNEL);
- else if (strncmp(str, "name=", strlen("name=")) == 0)
- attrs->name = kstrdup(str, GFP_KERNEL);
- else if (strcmp(str, "pause") == 0)
+ if (strchr(str, '=')) {
+ ret = parse_assignment(str, attrs);
+ if (ret)
+ goto free;
+ } else if (strcmp(str, "pause") == 0)
attrs->pause = true;
else if ((strcmp(str, "cont") == 0) ||
(strcmp(str, "continue") == 0))
attrs->cont = true;
else if (strcmp(str, "clear") == 0)
attrs->clear = true;
- else if (strncmp(str, "size=", strlen("size=")) == 0) {
- int map_bits = parse_map_size(str);
-
- if (map_bits < 0) {
- ret = map_bits;
- goto free;
- }
- attrs->map_bits = map_bits;
- } else {
+ else {
ret = -EINVAL;
goto free;
}

View File

@ -1,76 +0,0 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 8 Mar 2018 11:00:45 -0500
Subject: [PATCH 11/17] fold lookup_real() into __lookup_hash()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit a03ece5ff2bd7a9abaa0e8ddfe5f79d79e5984c8
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/namei.c | 41 +++++++++++++++++------------------------
1 file changed, 17 insertions(+), 24 deletions(-)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1475,43 +1475,36 @@ static struct dentry *lookup_dcache(cons
}
/*
- * Call i_op->lookup on the dentry. The dentry must be negative and
- * unhashed.
- *
- * dir->d_inode->i_mutex must be held
+ * Parent directory has inode locked exclusive. This is one
+ * and only case when ->lookup() gets called on non in-lookup
+ * dentries - as the matter of fact, this only gets called
+ * when directory is guaranteed to have no in-lookup children
+ * at all.
*/
-static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
- unsigned int flags)
-{
- struct dentry *old;
-
- /* Don't create child dentry for a dead directory. */
- if (unlikely(IS_DEADDIR(dir))) {
- dput(dentry);
- return ERR_PTR(-ENOENT);
- }
-
- old = dir->i_op->lookup(dir, dentry, flags);
- if (unlikely(old)) {
- dput(dentry);
- dentry = old;
- }
- return dentry;
-}
-
static struct dentry *__lookup_hash(const struct qstr *name,
struct dentry *base, unsigned int flags)
{
struct dentry *dentry = lookup_dcache(name, base, flags);
+ struct dentry *old;
+ struct inode *dir = base->d_inode;
if (dentry)
return dentry;
+ /* Don't create child dentry for a dead directory. */
+ if (unlikely(IS_DEADDIR(dir)))
+ return ERR_PTR(-ENOENT);
+
dentry = d_alloc(base, name);
if (unlikely(!dentry))
return ERR_PTR(-ENOMEM);
- return lookup_real(base->d_inode, dentry, flags);
+ old = dir->i_op->lookup(dir, dentry, flags);
+ if (unlikely(old)) {
+ dput(dentry);
+ dentry = old;
+ }
+ return dentry;
}
static int lookup_fast(struct nameidata *nd,

View File

@ -1,192 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 20 Dec 2017 17:13:00 +0100
Subject: [PATCH 11/29] hrtimer: Store running timer in hrtimer_clock_base
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
The pointer to the currently running timer is stored in hrtimer_cpu_base
before the base lock is dropped and the callback is invoked.
This results in two levels of indirections and the upcoming support for
softirq based hrtimer requires splitting the "running" storage into soft
and hard irq context expiry.
Storing both in the cpu base would require conditionals in all code paths
accessing that information.
It's possible to have a per clock base sequence count and running pointer
without changing the semantics of the related mechanisms because the timer
base pointer cannot be changed while a timer is running the callback.
Unfortunately this makes cpu_clock base larger than 32 bytes on 32bit
kernels. Instead of having huge gaps due to alignment, remove the alignment
and let the compiler pack cpu base for 32bit. The resulting cache access
patterns are fortunately not really different from the current
behaviour. On 64bit kernels the 64byte alignment stays and the behaviour is
unchanged. This was determined by analyzing the resulting layout and
looking at the number of cache lines involved for the frequently used
clocks.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/hrtimer.h | 20 +++++++++-----------
kernel/time/hrtimer.c | 28 +++++++++++++---------------
2 files changed, 22 insertions(+), 26 deletions(-)
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -118,9 +118,9 @@ struct hrtimer_sleeper {
};
#ifdef CONFIG_64BIT
-# define HRTIMER_CLOCK_BASE_ALIGN 64
+# define __hrtimer_clock_base_align ____cacheline_aligned
#else
-# define HRTIMER_CLOCK_BASE_ALIGN 32
+# define __hrtimer_clock_base_align
#endif
/**
@@ -129,18 +129,22 @@ struct hrtimer_sleeper {
* @index: clock type index for per_cpu support when moving a
* timer to a base on another cpu.
* @clockid: clock id for per_cpu support
+ * @seq: seqcount around __run_hrtimer
+ * @running: pointer to the currently running hrtimer
* @active: red black tree root node for the active timers
* @get_time: function to retrieve the current time of the clock
* @offset: offset of this clock to the monotonic base
*/
struct hrtimer_clock_base {
struct hrtimer_cpu_base *cpu_base;
- int index;
+ unsigned int index;
clockid_t clockid;
+ seqcount_t seq;
+ struct hrtimer *running;
struct timerqueue_head active;
ktime_t (*get_time)(void);
ktime_t offset;
-} __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN)));
+} __hrtimer_clock_base_align;
enum hrtimer_base_type {
HRTIMER_BASE_MONOTONIC,
@@ -154,8 +158,6 @@ enum hrtimer_base_type {
* struct hrtimer_cpu_base - the per cpu clock bases
* @lock: lock protecting the base and associated clock bases
* and timers
- * @seq: seqcount around __run_hrtimer
- * @running: pointer to the currently running hrtimer
* @cpu: cpu number
* @active_bases: Bitfield to mark bases with active timers
* @clock_was_set_seq: Sequence counter of clock was set events
@@ -177,8 +179,6 @@ enum hrtimer_base_type {
*/
struct hrtimer_cpu_base {
raw_spinlock_t lock;
- seqcount_t seq;
- struct hrtimer *running;
unsigned int cpu;
unsigned int active_bases;
unsigned int clock_was_set_seq;
@@ -198,8 +198,6 @@ struct hrtimer_cpu_base {
static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
{
- BUILD_BUG_ON(sizeof(struct hrtimer_clock_base) > HRTIMER_CLOCK_BASE_ALIGN);
-
timer->node.expires = time;
timer->_softexpires = time;
}
@@ -424,7 +422,7 @@ static inline int hrtimer_is_queued(stru
*/
static inline int hrtimer_callback_running(struct hrtimer *timer)
{
- return timer->base->cpu_base->running == timer;
+ return timer->base->running == timer;
}
/* Forward a hrtimer so it expires after now: */
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -70,7 +70,6 @@
DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
{
.lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
- .seq = SEQCNT_ZERO(hrtimer_bases.seq),
.clock_base =
{
{
@@ -118,7 +117,6 @@ static const int hrtimer_clock_to_base_t
* timer->base->cpu_base
*/
static struct hrtimer_cpu_base migration_cpu_base = {
- .seq = SEQCNT_ZERO(migration_cpu_base),
.clock_base = { { .cpu_base = &migration_cpu_base, }, },
};
@@ -1150,19 +1148,19 @@ EXPORT_SYMBOL_GPL(hrtimer_init);
*/
bool hrtimer_active(const struct hrtimer *timer)
{
- struct hrtimer_cpu_base *cpu_base;
+ struct hrtimer_clock_base *base;
unsigned int seq;
do {
- cpu_base = READ_ONCE(timer->base->cpu_base);
- seq = raw_read_seqcount_begin(&cpu_base->seq);
+ base = READ_ONCE(timer->base);
+ seq = raw_read_seqcount_begin(&base->seq);
if (timer->state != HRTIMER_STATE_INACTIVE ||
- cpu_base->running == timer)
+ base->running == timer)
return true;
- } while (read_seqcount_retry(&cpu_base->seq, seq) ||
- cpu_base != READ_ONCE(timer->base->cpu_base));
+ } while (read_seqcount_retry(&base->seq, seq) ||
+ base != READ_ONCE(timer->base));
return false;
}
@@ -1196,16 +1194,16 @@ static void __run_hrtimer(struct hrtimer
lockdep_assert_held(&cpu_base->lock);
debug_deactivate(timer);
- cpu_base->running = timer;
+ base->running = timer;
/*
* Separate the ->running assignment from the ->state assignment.
*
* As with a regular write barrier, this ensures the read side in
- * hrtimer_active() cannot observe cpu_base->running == NULL &&
+ * hrtimer_active() cannot observe base->running == NULL &&
* timer->state == INACTIVE.
*/
- raw_write_seqcount_barrier(&cpu_base->seq);
+ raw_write_seqcount_barrier(&base->seq);
__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
fn = timer->function;
@@ -1246,13 +1244,13 @@ static void __run_hrtimer(struct hrtimer
* Separate the ->running assignment from the ->state assignment.
*
* As with a regular write barrier, this ensures the read side in
- * hrtimer_active() cannot observe cpu_base->running == NULL &&
+ * hrtimer_active() cannot observe base->running.timer == NULL &&
* timer->state == INACTIVE.
*/
- raw_write_seqcount_barrier(&cpu_base->seq);
+ raw_write_seqcount_barrier(&base->seq);
- WARN_ON_ONCE(cpu_base->running != timer);
- cpu_base->running = NULL;
+ WARN_ON_ONCE(base->running != timer);
+ base->running = NULL;
}
static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)

View File

@ -1,243 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:45 -0600
Subject: [PATCH 11/37] tracing: Add hist trigger timestamp support
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Add support for a timestamp event field. This is actually a 'pseudo-'
event field in that it behaves like it's part of the event record, but
is really part of the corresponding ring buffer event.
To make use of the timestamp field, users can specify
"common_timestamp" as a field name for any histogram. Note that this
doesn't make much sense on its own either as either a key or value,
but needs to be supported even so, since follow-on patches will add
support for making use of this field in time deltas. The
common_timestamp 'field' is not a bona fide event field - so you won't
find it in the event description - but rather it's a synthetic field
that can be used like a real field.
Note that the use of this field requires the ring buffer be put into
'absolute timestamp' mode, which saves the complete timestamp for each
event rather than an offset. This mode will be enabled if and only if
a histogram makes use of the "common_timestamp" field.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Baohong Liu <baohong.liu@intel.com>
[kasan use-after-free fix]
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 94 +++++++++++++++++++++++++++++----------
1 file changed, 71 insertions(+), 23 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -89,6 +89,12 @@ static u64 hist_field_log2(struct hist_f
return (u64) ilog2(roundup_pow_of_two(val));
}
+static u64 hist_field_timestamp(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
+{
+ return ring_buffer_event_time_stamp(rbe);
+}
+
#define DEFINE_HIST_FIELD_FN(type) \
static u64 hist_field_##type(struct hist_field *hist_field, \
void *event, \
@@ -135,6 +141,7 @@ enum hist_field_flags {
HIST_FIELD_FL_SYSCALL = 1 << 7,
HIST_FIELD_FL_STACKTRACE = 1 << 8,
HIST_FIELD_FL_LOG2 = 1 << 9,
+ HIST_FIELD_FL_TIMESTAMP = 1 << 10,
};
struct hist_trigger_attrs {
@@ -159,6 +166,7 @@ struct hist_trigger_data {
struct trace_event_file *event_file;
struct hist_trigger_attrs *attrs;
struct tracing_map *map;
+ bool enable_timestamps;
};
static const char *hist_field_name(struct hist_field *field,
@@ -173,6 +181,8 @@ static const char *hist_field_name(struc
field_name = field->field->name;
else if (field->flags & HIST_FIELD_FL_LOG2)
field_name = hist_field_name(field->operands[0], ++level);
+ else if (field->flags & HIST_FIELD_FL_TIMESTAMP)
+ field_name = "common_timestamp";
if (field_name == NULL)
field_name = "";
@@ -440,6 +450,12 @@ static struct hist_field *create_hist_fi
goto out;
}
+ if (flags & HIST_FIELD_FL_TIMESTAMP) {
+ hist_field->fn = hist_field_timestamp;
+ hist_field->size = sizeof(u64);
+ goto out;
+ }
+
if (WARN_ON_ONCE(!field))
goto out;
@@ -517,10 +533,15 @@ static int create_val_field(struct hist_
}
}
- field = trace_find_event_field(file->event_call, field_name);
- if (!field || !field->size) {
- ret = -EINVAL;
- goto out;
+ if (strcmp(field_name, "common_timestamp") == 0) {
+ flags |= HIST_FIELD_FL_TIMESTAMP;
+ hist_data->enable_timestamps = true;
+ } else {
+ field = trace_find_event_field(file->event_call, field_name);
+ if (!field || !field->size) {
+ ret = -EINVAL;
+ goto out;
+ }
}
hist_data->fields[val_idx] = create_hist_field(field, flags);
@@ -615,16 +636,22 @@ static int create_key_field(struct hist_
}
}
- field = trace_find_event_field(file->event_call, field_name);
- if (!field || !field->size) {
- ret = -EINVAL;
- goto out;
- }
+ if (strcmp(field_name, "common_timestamp") == 0) {
+ flags |= HIST_FIELD_FL_TIMESTAMP;
+ hist_data->enable_timestamps = true;
+ key_size = sizeof(u64);
+ } else {
+ field = trace_find_event_field(file->event_call, field_name);
+ if (!field || !field->size) {
+ ret = -EINVAL;
+ goto out;
+ }
- if (is_string_field(field))
- key_size = MAX_FILTER_STR_VAL;
- else
- key_size = field->size;
+ if (is_string_field(field))
+ key_size = MAX_FILTER_STR_VAL;
+ else
+ key_size = field->size;
+ }
}
hist_data->fields[key_idx] = create_hist_field(field, flags);
@@ -820,6 +847,9 @@ static int create_tracing_map_fields(str
if (hist_field->flags & HIST_FIELD_FL_STACKTRACE)
cmp_fn = tracing_map_cmp_none;
+ else if (!field)
+ cmp_fn = tracing_map_cmp_num(hist_field->size,
+ hist_field->is_signed);
else if (is_string_field(field))
cmp_fn = tracing_map_cmp_string;
else
@@ -1217,7 +1247,11 @@ static void hist_field_print(struct seq_
{
const char *field_name = hist_field_name(hist_field, 0);
- seq_printf(m, "%s", field_name);
+ if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP)
+ seq_puts(m, "common_timestamp");
+ else if (field_name)
+ seq_printf(m, "%s", field_name);
+
if (hist_field->flags) {
const char *flags_str = get_hist_field_flags(hist_field);
@@ -1268,27 +1302,25 @@ static int event_hist_trigger_print(stru
for (i = 0; i < hist_data->n_sort_keys; i++) {
struct tracing_map_sort_key *sort_key;
+ unsigned int idx;
sort_key = &hist_data->sort_keys[i];
+ idx = sort_key->field_idx;
+
+ if (WARN_ON(idx >= TRACING_MAP_FIELDS_MAX))
+ return -EINVAL;
if (i > 0)
seq_puts(m, ",");
- if (sort_key->field_idx == HITCOUNT_IDX)
+ if (idx == HITCOUNT_IDX)
seq_puts(m, "hitcount");
- else {
- unsigned int idx = sort_key->field_idx;
-
- if (WARN_ON(idx >= TRACING_MAP_FIELDS_MAX))
- return -EINVAL;
-
+ else
hist_field_print(m, hist_data->fields[idx]);
- }
if (sort_key->descending)
seq_puts(m, ".descending");
}
-
seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits));
if (data->filter_str)
@@ -1456,6 +1488,10 @@ static bool hist_trigger_match(struct ev
return false;
if (key_field->offset != key_field_test->offset)
return false;
+ if (key_field->size != key_field_test->size)
+ return false;
+ if (key_field->is_signed != key_field_test->is_signed)
+ return false;
}
for (i = 0; i < hist_data->n_sort_keys; i++) {
@@ -1538,6 +1574,9 @@ static int hist_register_trigger(char *g
update_cond_flag(file);
+ if (hist_data->enable_timestamps)
+ tracing_set_time_stamp_abs(file->tr, true);
+
if (trace_event_trigger_enable_disable(file, 1) < 0) {
list_del_rcu(&data->list);
update_cond_flag(file);
@@ -1572,17 +1611,26 @@ static void hist_unregister_trigger(char
if (unregistered && test->ops->free)
test->ops->free(test->ops, test);
+
+ if (hist_data->enable_timestamps) {
+ if (unregistered)
+ tracing_set_time_stamp_abs(file->tr, false);
+ }
}
static void hist_unreg_all(struct trace_event_file *file)
{
struct event_trigger_data *test, *n;
+ struct hist_trigger_data *hist_data;
list_for_each_entry_safe(test, n, &file->triggers, list) {
if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+ hist_data = test->private_data;
list_del_rcu(&test->list);
trace_event_trigger_enable_disable(file, 0);
update_cond_flag(file);
+ if (hist_data->enable_timestamps)
+ tracing_set_time_stamp_abs(file->tr, false);
if (test->ops->free)
test->ops->free(test->ops, test);
}

View File

@ -1,119 +0,0 @@
From: Vedang Patel <vedang.patel@intel.com>
Date: Mon, 15 Jan 2018 20:51:37 -0600
Subject: [PATCH 11/48] tracing: Add support to detect and avoid duplicates
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
A duplicate in the tracing_map hash table is when 2 different entries
have the same key and, as a result, the key_hash. This is possible due
to a race condition in the algorithm. This race condition is inherent to
the algorithm and not a bug. This was fine because, until now, we were
only interested in the sum of all the values related to a particular
key (the duplicates are dealt with in tracing_map_sort_entries()). But,
with the inclusion of variables[1], we are interested in individual
values. So, it will not be clear what value to choose when
there are duplicates. So, the duplicates need to be removed.
The duplicates can occur in the code in the following scenarios:
- A thread is in the process of adding a new element. It has
successfully executed cmpxchg() and inserted the key. But, it is still
not done acquiring the trace_map_elt struct, populating it and storing
the pointer to the struct in the value field of tracing_map hash table.
If another thread comes in at this time and wants to add an element with
the same key, it will not see the current element and add a new one.
- There are multiple threads trying to execute cmpxchg at the same time,
one of the threads will succeed and the others will fail. The ones which
fail will go ahead increment 'idx' and add a new element there creating
a duplicate.
This patch detects and avoids the first condition by asking the thread
which detects the duplicate to loop one more time. There is also a
possibility of infinite loop if the thread which is trying to insert
goes to sleep indefinitely and the one which is trying to insert a new
element detects a duplicate. Which is why, the thread loops for
map_size iterations before returning NULL.
The second scenario is avoided by preventing the threads which failed
cmpxchg() from incrementing idx. This way, they will loop
around and check if the thread which succeeded in executing cmpxchg()
had the same key.
[1] http://lkml.kernel.org/r/cover.1498510759.git.tom.zanussi@linux.intel.com
Link: http://lkml.kernel.org/r/e178e89ec399240331d383bd5913d649713110f4.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit bd0a7ab135d0d0872296c3ae3c4f816a9a4c3dee)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/tracing_map.c | 41 ++++++++++++++++++++++++++++++++++++-----
1 file changed, 36 insertions(+), 5 deletions(-)
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -414,7 +414,9 @@ static inline struct tracing_map_elt *
__tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only)
{
u32 idx, key_hash, test_key;
+ int dup_try = 0;
struct tracing_map_entry *entry;
+ struct tracing_map_elt *val;
key_hash = jhash(key, map->key_size, 0);
if (key_hash == 0)
@@ -426,11 +428,33 @@ static inline struct tracing_map_elt *
entry = TRACING_MAP_ENTRY(map->map, idx);
test_key = entry->key;
- if (test_key && test_key == key_hash && entry->val &&
- keys_match(key, entry->val->key, map->key_size)) {
- if (!lookup_only)
- atomic64_inc(&map->hits);
- return entry->val;
+ if (test_key && test_key == key_hash) {
+ val = READ_ONCE(entry->val);
+ if (val &&
+ keys_match(key, val->key, map->key_size)) {
+ if (!lookup_only)
+ atomic64_inc(&map->hits);
+ return val;
+ } else if (unlikely(!val)) {
+ /*
+ * The key is present. But, val (pointer to elt
+ * struct) is still NULL. which means some other
+ * thread is in the process of inserting an
+ * element.
+ *
+ * On top of that, it's key_hash is same as the
+ * one being inserted right now. So, it's
+ * possible that the element has the same
+ * key as well.
+ */
+
+ dup_try++;
+ if (dup_try > map->map_size) {
+ atomic64_inc(&map->drops);
+ break;
+ }
+ continue;
+ }
}
if (!test_key) {
@@ -452,6 +476,13 @@ static inline struct tracing_map_elt *
atomic64_inc(&map->hits);
return entry->val;
+ } else {
+ /*
+ * cmpxchg() failed. Loop around once
+ * more to check what key was inserted.
+ */
+ dup_try++;
+ continue;
}
}

View File

@ -1,27 +0,0 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 8 Mar 2018 11:01:22 -0500
Subject: [PATCH 12/17] debugfs_lookup(): switch to lookup_one_len_unlocked()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit cd1c0c9321999737073dcfc3364e194e02604bce
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/debugfs/inode.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -270,10 +270,7 @@ struct dentry *debugfs_lookup(const char
if (!parent)
parent = debugfs_mount->mnt_root;
- inode_lock(d_inode(parent));
- dentry = lookup_one_len(name, parent, strlen(name));
- inode_unlock(d_inode(parent));
-
+ dentry = lookup_one_len_unlocked(name, parent, strlen(name));
if (IS_ERR(dentry))
return NULL;
if (!d_really_is_positive(dentry)) {

View File

@ -1,34 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 20 Dec 2017 17:13:01 +0100
Subject: [PATCH 12/29] hrtimer: Make room in struct hrtimer_cpu_base
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
The upcoming softirq based hrtimers support requires an additional field in
the hrtimer_cpu_base struct, which would grow the struct size beyond a
cache line.
The struct members nr_retries and nr_hangs of hrtimer_cpu_base are solely
used for diagnostic output and have no requirement to be unsigned int.
Make them unsigned short to create room for the new struct member. No
functional change.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/hrtimer.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -189,8 +189,8 @@ struct hrtimer_cpu_base {
ktime_t expires_next;
struct hrtimer *next_timer;
unsigned int nr_events;
- unsigned int nr_retries;
- unsigned int nr_hangs;
+ unsigned short nr_retries;
+ unsigned short nr_hangs;
unsigned int max_hang_time;
#endif
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];

View File

@ -1,221 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:46 -0600
Subject: [PATCH 12/37] tracing: Add per-element variable support to
tracing_map
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
In order to allow information to be passed between trace events, add
support for per-element variables to tracing_map. This provides a
means for histograms to associate a value or values with an entry when
it's saved or updated, and retrieved by a subsequent event occurrences.
Variables can be set using tracing_map_set_var() and read using
tracing_map_read_var(). tracing_map_var_set() returns true or false
depending on whether or not the variable has been set or not, which is
important for event-matching applications.
tracing_map_read_var_once() reads the variable and resets it to the
'unset' state, implementing read-once variables, which are also
important for event-matching uses.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/tracing_map.c | 108 +++++++++++++++++++++++++++++++++++++++++++++
kernel/trace/tracing_map.h | 11 ++++
2 files changed, 119 insertions(+)
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -66,6 +66,73 @@ u64 tracing_map_read_sum(struct tracing_
return (u64)atomic64_read(&elt->fields[i].sum);
}
+/**
+ * tracing_map_set_var - Assign a tracing_map_elt's variable field
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ * @n: The value to assign
+ *
+ * Assign n to variable i associated with the specified tracing_map_elt
+ * instance. The index i is the index returned by the call to
+ * tracing_map_add_var() when the tracing map was set up.
+ */
+void tracing_map_set_var(struct tracing_map_elt *elt, unsigned int i, u64 n)
+{
+ atomic64_set(&elt->vars[i], n);
+ elt->var_set[i] = true;
+}
+
+/**
+ * tracing_map_var_set - Return whether or not a variable has been set
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ *
+ * Return true if the variable has been set, false otherwise. The
+ * index i is the index returned by the call to tracing_map_add_var()
+ * when the tracing map was set up.
+ */
+bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i)
+{
+ return elt->var_set[i];
+}
+
+/**
+ * tracing_map_read_var - Return the value of a tracing_map_elt's variable field
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ *
+ * Retrieve the value of the variable i associated with the specified
+ * tracing_map_elt instance. The index i is the index returned by the
+ * call to tracing_map_add_var() when the tracing map was set
+ * up.
+ *
+ * Return: The variable value associated with field i for elt.
+ */
+u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i)
+{
+ return (u64)atomic64_read(&elt->vars[i]);
+}
+
+/**
+ * tracing_map_read_var_once - Return and reset a tracing_map_elt's variable field
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ *
+ * Retrieve the value of the variable i associated with the specified
+ * tracing_map_elt instance, and reset the variable to the 'not set'
+ * state. The index i is the index returned by the call to
+ * tracing_map_add_var() when the tracing map was set up. The reset
+ * essentially makes the variable a read-once variable if it's only
+ * accessed using this function.
+ *
+ * Return: The variable value associated with field i for elt.
+ */
+u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i)
+{
+ elt->var_set[i] = false;
+ return (u64)atomic64_read(&elt->vars[i]);
+}
+
int tracing_map_cmp_string(void *val_a, void *val_b)
{
char *a = val_a;
@@ -171,6 +238,28 @@ int tracing_map_add_sum_field(struct tra
}
/**
+ * tracing_map_add_var - Add a field describing a tracing_map var
+ * @map: The tracing_map
+ *
+ * Add a var to the map and return the index identifying it in the map
+ * and associated tracing_map_elts. This is the index used for
+ * instance to update a var for a particular tracing_map_elt using
+ * tracing_map_update_var() or reading it via tracing_map_read_var().
+ *
+ * Return: The index identifying the var in the map and associated
+ * tracing_map_elts, or -EINVAL on error.
+ */
+int tracing_map_add_var(struct tracing_map *map)
+{
+ int ret = -EINVAL;
+
+ if (map->n_vars < TRACING_MAP_VARS_MAX)
+ ret = map->n_vars++;
+
+ return ret;
+}
+
+/**
* tracing_map_add_key_field - Add a field describing a tracing_map key
* @map: The tracing_map
* @offset: The offset within the key
@@ -280,6 +369,11 @@ static void tracing_map_elt_clear(struct
if (elt->fields[i].cmp_fn == tracing_map_cmp_atomic64)
atomic64_set(&elt->fields[i].sum, 0);
+ for (i = 0; i < elt->map->n_vars; i++) {
+ atomic64_set(&elt->vars[i], 0);
+ elt->var_set[i] = false;
+ }
+
if (elt->map->ops && elt->map->ops->elt_clear)
elt->map->ops->elt_clear(elt);
}
@@ -306,6 +400,8 @@ static void tracing_map_elt_free(struct
if (elt->map->ops && elt->map->ops->elt_free)
elt->map->ops->elt_free(elt);
kfree(elt->fields);
+ kfree(elt->vars);
+ kfree(elt->var_set);
kfree(elt->key);
kfree(elt);
}
@@ -332,6 +428,18 @@ static struct tracing_map_elt *tracing_m
err = -ENOMEM;
goto free;
}
+
+ elt->vars = kcalloc(map->n_vars, sizeof(*elt->vars), GFP_KERNEL);
+ if (!elt->vars) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ elt->var_set = kcalloc(map->n_vars, sizeof(*elt->var_set), GFP_KERNEL);
+ if (!elt->var_set) {
+ err = -ENOMEM;
+ goto free;
+ }
tracing_map_elt_init_fields(elt);
--- a/kernel/trace/tracing_map.h
+++ b/kernel/trace/tracing_map.h
@@ -10,6 +10,7 @@
#define TRACING_MAP_VALS_MAX 3
#define TRACING_MAP_FIELDS_MAX (TRACING_MAP_KEYS_MAX + \
TRACING_MAP_VALS_MAX)
+#define TRACING_MAP_VARS_MAX 16
#define TRACING_MAP_SORT_KEYS_MAX 2
typedef int (*tracing_map_cmp_fn_t) (void *val_a, void *val_b);
@@ -137,6 +138,8 @@ struct tracing_map_field {
struct tracing_map_elt {
struct tracing_map *map;
struct tracing_map_field *fields;
+ atomic64_t *vars;
+ bool *var_set;
void *key;
void *private_data;
};
@@ -192,6 +195,7 @@ struct tracing_map {
int key_idx[TRACING_MAP_KEYS_MAX];
unsigned int n_keys;
struct tracing_map_sort_key sort_key;
+ unsigned int n_vars;
atomic64_t hits;
atomic64_t drops;
};
@@ -241,6 +245,7 @@ tracing_map_create(unsigned int map_bits
extern int tracing_map_init(struct tracing_map *map);
extern int tracing_map_add_sum_field(struct tracing_map *map);
+extern int tracing_map_add_var(struct tracing_map *map);
extern int tracing_map_add_key_field(struct tracing_map *map,
unsigned int offset,
tracing_map_cmp_fn_t cmp_fn);
@@ -260,7 +265,13 @@ extern int tracing_map_cmp_none(void *va
extern void tracing_map_update_sum(struct tracing_map_elt *elt,
unsigned int i, u64 n);
+extern void tracing_map_set_var(struct tracing_map_elt *elt,
+ unsigned int i, u64 n);
+extern bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i);
extern u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i);
+extern u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i);
+extern u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i);
+
extern void tracing_map_set_field_descr(struct tracing_map *map,
unsigned int i,
unsigned int key_offset,

View File

@ -1,193 +0,0 @@
From: Vedang Patel <vedang.patel@intel.com>
Date: Mon, 15 Jan 2018 20:51:38 -0600
Subject: [PATCH 12/48] tracing: Remove code which merges duplicates
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
We now have the logic to detect and remove duplicates in the
tracing_map hash table. The code which merges duplicates in the
histogram is redundant now. So, modify this code just to detect
duplicates. The duplication detection code is still kept to ensure
that any rare race condition which might cause duplicates does not go
unnoticed.
Link: http://lkml.kernel.org/r/55215cf59e2674391bdaf772fdafc4c393352b03.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 3f7f4cc21fc62ff7da7d34b5ca95a69d73a1f764)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 11 -----
kernel/trace/tracing_map.c | 83 ++-------------------------------------
kernel/trace/tracing_map.h | 7 ---
3 files changed, 6 insertions(+), 95 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -340,16 +340,6 @@ static int hist_trigger_elt_comm_alloc(s
return 0;
}
-static void hist_trigger_elt_comm_copy(struct tracing_map_elt *to,
- struct tracing_map_elt *from)
-{
- char *comm_from = from->private_data;
- char *comm_to = to->private_data;
-
- if (comm_from)
- memcpy(comm_to, comm_from, TASK_COMM_LEN + 1);
-}
-
static void hist_trigger_elt_comm_init(struct tracing_map_elt *elt)
{
char *comm = elt->private_data;
@@ -360,7 +350,6 @@ static void hist_trigger_elt_comm_init(s
static const struct tracing_map_ops hist_trigger_elt_comm_ops = {
.elt_alloc = hist_trigger_elt_comm_alloc,
- .elt_copy = hist_trigger_elt_comm_copy,
.elt_free = hist_trigger_elt_comm_free,
.elt_init = hist_trigger_elt_comm_init,
};
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -847,67 +847,15 @@ create_sort_entry(void *key, struct trac
return sort_entry;
}
-static struct tracing_map_elt *copy_elt(struct tracing_map_elt *elt)
-{
- struct tracing_map_elt *dup_elt;
- unsigned int i;
-
- dup_elt = tracing_map_elt_alloc(elt->map);
- if (IS_ERR(dup_elt))
- return NULL;
-
- if (elt->map->ops && elt->map->ops->elt_copy)
- elt->map->ops->elt_copy(dup_elt, elt);
-
- dup_elt->private_data = elt->private_data;
- memcpy(dup_elt->key, elt->key, elt->map->key_size);
-
- for (i = 0; i < elt->map->n_fields; i++) {
- atomic64_set(&dup_elt->fields[i].sum,
- atomic64_read(&elt->fields[i].sum));
- dup_elt->fields[i].cmp_fn = elt->fields[i].cmp_fn;
- }
-
- return dup_elt;
-}
-
-static int merge_dup(struct tracing_map_sort_entry **sort_entries,
- unsigned int target, unsigned int dup)
-{
- struct tracing_map_elt *target_elt, *elt;
- bool first_dup = (target - dup) == 1;
- int i;
-
- if (first_dup) {
- elt = sort_entries[target]->elt;
- target_elt = copy_elt(elt);
- if (!target_elt)
- return -ENOMEM;
- sort_entries[target]->elt = target_elt;
- sort_entries[target]->elt_copied = true;
- } else
- target_elt = sort_entries[target]->elt;
-
- elt = sort_entries[dup]->elt;
-
- for (i = 0; i < elt->map->n_fields; i++)
- atomic64_add(atomic64_read(&elt->fields[i].sum),
- &target_elt->fields[i].sum);
-
- sort_entries[dup]->dup = true;
-
- return 0;
-}
-
-static int merge_dups(struct tracing_map_sort_entry **sort_entries,
+static void detect_dups(struct tracing_map_sort_entry **sort_entries,
int n_entries, unsigned int key_size)
{
unsigned int dups = 0, total_dups = 0;
- int err, i, j;
+ int i;
void *key;
if (n_entries < 2)
- return total_dups;
+ return;
sort(sort_entries, n_entries, sizeof(struct tracing_map_sort_entry *),
(int (*)(const void *, const void *))cmp_entries_dup, NULL);
@@ -916,30 +864,14 @@ static int merge_dups(struct tracing_map
for (i = 1; i < n_entries; i++) {
if (!memcmp(sort_entries[i]->key, key, key_size)) {
dups++; total_dups++;
- err = merge_dup(sort_entries, i - dups, i);
- if (err)
- return err;
continue;
}
key = sort_entries[i]->key;
dups = 0;
}
- if (!total_dups)
- return total_dups;
-
- for (i = 0, j = 0; i < n_entries; i++) {
- if (!sort_entries[i]->dup) {
- sort_entries[j] = sort_entries[i];
- if (j++ != i)
- sort_entries[i] = NULL;
- } else {
- destroy_sort_entry(sort_entries[i]);
- sort_entries[i] = NULL;
- }
- }
-
- return total_dups;
+ WARN_ONCE(total_dups > 0,
+ "Duplicates detected: %d\n", total_dups);
}
static bool is_key(struct tracing_map *map, unsigned int field_idx)
@@ -1065,10 +997,7 @@ int tracing_map_sort_entries(struct trac
return 1;
}
- ret = merge_dups(entries, n_entries, map->key_size);
- if (ret < 0)
- goto free;
- n_entries -= ret;
+ detect_dups(entries, n_entries, map->key_size);
if (is_key(map, sort_keys[0].field_idx))
cmp_entries_fn = cmp_entries_key;
--- a/kernel/trace/tracing_map.h
+++ b/kernel/trace/tracing_map.h
@@ -215,11 +215,6 @@ struct tracing_map {
* Element allocation occurs before tracing begins, when the
* tracing_map_init() call is made by client code.
*
- * @elt_copy: At certain points in the lifetime of an element, it may
- * need to be copied. The copy should include a copy of the
- * client-allocated data, which can be copied into the 'to'
- * element from the 'from' element.
- *
* @elt_free: When a tracing_map_elt is freed, this function is called
* and allows client-allocated per-element data to be freed.
*
@@ -233,8 +228,6 @@ struct tracing_map {
*/
struct tracing_map_ops {
int (*elt_alloc)(struct tracing_map_elt *elt);
- void (*elt_copy)(struct tracing_map_elt *to,
- struct tracing_map_elt *from);
void (*elt_free)(struct tracing_map_elt *elt);
void (*elt_clear)(struct tracing_map_elt *elt);
void (*elt_init)(struct tracing_map_elt *elt);

View File

@ -1,150 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 20 Dec 2017 17:13:02 +0100
Subject: [PATCH 13/29] hrtimer: Reduce conditional code (hres_active)
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
The hrtimer_cpu_base struct has the CONFIG_HIGH_RES_TIMERS conditional
struct member hres_active. All related functions to this member are
conditional as well.
There is no functional change, when the hres_active member is
unconditional with all related functions and is set to zero during
initialization.
The conditional code sections can be avoided by adding IS_ENABLED(HIGHRES)
conditionals into common functions, which ensures dead code elimination.
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/hrtimer.h | 20 ++++++++------------
kernel/time/hrtimer.c | 31 +++++++++++++++----------------
2 files changed, 23 insertions(+), 28 deletions(-)
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -161,8 +161,8 @@ enum hrtimer_base_type {
* @cpu: cpu number
* @active_bases: Bitfield to mark bases with active timers
* @clock_was_set_seq: Sequence counter of clock was set events
- * @in_hrtirq: hrtimer_interrupt() is currently executing
* @hres_active: State of high resolution mode
+ * @in_hrtirq: hrtimer_interrupt() is currently executing
* @hang_detected: The last hrtimer interrupt detected a hang
* @expires_next: absolute time of the next event, is required for remote
* hrtimer enqueue
@@ -182,9 +182,9 @@ struct hrtimer_cpu_base {
unsigned int cpu;
unsigned int active_bases;
unsigned int clock_was_set_seq;
+ unsigned int hres_active : 1;
#ifdef CONFIG_HIGH_RES_TIMERS
unsigned int in_hrtirq : 1,
- hres_active : 1,
hang_detected : 1;
ktime_t expires_next;
struct hrtimer *next_timer;
@@ -266,16 +266,17 @@ static inline ktime_t hrtimer_cb_get_tim
return timer->base->get_time();
}
+static inline int hrtimer_is_hres_active(struct hrtimer *timer)
+{
+ return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ?
+ timer->base->cpu_base->hres_active : 0;
+}
+
#ifdef CONFIG_HIGH_RES_TIMERS
struct clock_event_device;
extern void hrtimer_interrupt(struct clock_event_device *dev);
-static inline int hrtimer_is_hres_active(struct hrtimer *timer)
-{
- return timer->base->cpu_base->hres_active;
-}
-
/*
* The resolution of the clocks. The resolution value is returned in
* the clock_getres() system call to give application programmers an
@@ -298,11 +299,6 @@ extern unsigned int hrtimer_resolution;
#define hrtimer_resolution (unsigned int)LOW_RES_NSEC
-static inline int hrtimer_is_hres_active(struct hrtimer *timer)
-{
- return 0;
-}
-
static inline void clock_was_set_delayed(void) { }
#endif
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -512,6 +512,20 @@ static inline ktime_t hrtimer_update_bas
offs_real, offs_boot, offs_tai);
}
+/*
+ * Is the high resolution mode active ?
+ */
+static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
+{
+ return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ?
+ cpu_base->hres_active : 0;
+}
+
+static inline int hrtimer_hres_active(void)
+{
+ return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
+}
+
/* High resolution timer related functions */
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -541,19 +555,6 @@ static inline int hrtimer_is_hres_enable
}
/*
- * Is the high resolution mode active ?
- */
-static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
-{
- return cpu_base->hres_active;
-}
-
-static inline int hrtimer_hres_active(void)
-{
- return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
-}
-
-/*
* Reprogram the event source with checking both queues for the
* next event
* Called with interrupts disabled and base->lock held
@@ -662,7 +663,6 @@ static inline void hrtimer_init_hres(str
{
base->expires_next = KTIME_MAX;
base->hang_detected = 0;
- base->hres_active = 0;
base->next_timer = NULL;
}
@@ -722,8 +722,6 @@ void clock_was_set_delayed(void)
#else
-static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *b) { return 0; }
-static inline int hrtimer_hres_active(void) { return 0; }
static inline int hrtimer_is_hres_enabled(void) { return 0; }
static inline void hrtimer_switch_to_hres(void) { }
static inline void
@@ -1605,6 +1603,7 @@ int hrtimers_prepare_cpu(unsigned int cp
cpu_base->active_bases = 0;
cpu_base->cpu = cpu;
+ cpu_base->hres_active = 0;
hrtimer_init_hres(cpu_base);
return 0;
}

View File

@ -1,45 +0,0 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 9 Mar 2018 18:06:03 -0500
Subject: [PATCH 13/17] lustre: get rid of pointless casts to struct dentry *
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 5bf1ddf7ee0e23598a620ef9ea2b0f00e804859d
... when feeding const struct dentry * to primitives taking
exactly that.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/staging/lustre/lustre/llite/dcache.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ b/drivers/staging/lustre/lustre/llite/dcache.c
@@ -90,7 +90,7 @@ static int ll_dcompare(const struct dent
d_count(dentry));
/* mountpoint is always valid */
- if (d_mountpoint((struct dentry *)dentry))
+ if (d_mountpoint(dentry))
return 0;
if (d_lustre_invalid(dentry))
@@ -111,7 +111,7 @@ static int ll_ddelete(const struct dentr
LASSERT(de);
CDEBUG(D_DENTRY, "%s dentry %pd (%p, parent %p, inode %p) %s%s\n",
- d_lustre_invalid((struct dentry *)de) ? "deleting" : "keeping",
+ d_lustre_invalid(de) ? "deleting" : "keeping",
de, de, de->d_parent, d_inode(de),
d_unhashed(de) ? "" : "hashed,",
list_empty(&de->d_subdirs) ? "" : "subdirs");
@@ -119,7 +119,7 @@ static int ll_ddelete(const struct dentr
/* kernel >= 2.6.38 last refcount is decreased after this function. */
LASSERT(d_count(de) == 1);
- if (d_lustre_invalid((struct dentry *)de))
+ if (d_lustre_invalid(de))
return 1;
return 0;
}

View File

@ -1,133 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:39 -0600
Subject: [PATCH 13/48] ring-buffer: Add interface for setting absolute time
stamps
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Define a new function, tracing_set_time_stamp_abs(), which can be used
to enable or disable the use of absolute timestamps rather than time
deltas for a trace array.
Only the interface is added here; a subsequent patch will add the
underlying implementation.
Link: http://lkml.kernel.org/r/ce96119de44c7fe0ee44786d15254e9b493040d3.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Baohong Liu <baohong.liu@intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 22753475c5232cd6f024746d6a6696a4dd2683ab)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/ring_buffer.h | 2 ++
kernel/trace/ring_buffer.c | 11 +++++++++++
kernel/trace/trace.c | 33 ++++++++++++++++++++++++++++++++-
kernel/trace/trace.h | 3 +++
4 files changed, 48 insertions(+), 1 deletion(-)
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -178,6 +178,8 @@ void ring_buffer_normalize_time_stamp(st
int cpu, u64 *ts);
void ring_buffer_set_clock(struct ring_buffer *buffer,
u64 (*clock)(void));
+void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs);
+bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer);
size_t ring_buffer_page_len(void *page);
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -488,6 +488,7 @@ struct ring_buffer {
u64 (*clock)(void);
struct rb_irq_work irq_work;
+ bool time_stamp_abs;
};
struct ring_buffer_iter {
@@ -1387,6 +1388,16 @@ void ring_buffer_set_clock(struct ring_b
buffer->clock = clock;
}
+void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs)
+{
+ buffer->time_stamp_abs = abs;
+}
+
+bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer)
+{
+ return buffer->time_stamp_abs;
+}
+
static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
static inline unsigned long rb_page_entries(struct buffer_page *bpage)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2269,7 +2269,7 @@ trace_event_buffer_lock_reserve(struct r
*current_rb = trace_file->tr->trace_buffer.buffer;
- if ((trace_file->flags &
+ if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
(EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
(entry = this_cpu_read(trace_buffered_event))) {
/* Try to use the per cpu buffer first */
@@ -6281,6 +6281,37 @@ static int tracing_clock_open(struct ino
return ret;
}
+
+int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
+{
+ int ret = 0;
+
+ mutex_lock(&trace_types_lock);
+
+ if (abs && tr->time_stamp_abs_ref++)
+ goto out;
+
+ if (!abs) {
+ if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (--tr->time_stamp_abs_ref)
+ goto out;
+ }
+
+ ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+ if (tr->max_buffer.buffer)
+ ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
+#endif
+ out:
+ mutex_unlock(&trace_types_lock);
+
+ return ret;
+}
struct ftrace_buffer_info {
struct trace_iterator iter;
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -273,6 +273,7 @@ struct trace_array {
/* function tracing enabled */
int function_enabled;
#endif
+ int time_stamp_abs_ref;
};
enum {
@@ -286,6 +287,8 @@ extern struct mutex trace_types_lock;
extern int trace_array_get(struct trace_array *tr);
extern void trace_array_put(struct trace_array *tr);
+extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs);
+
/*
* The global tracer (top) should be the first trace array added,
* but we check the flag anyway.

View File

@ -1,79 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:47 -0600
Subject: [PATCH 13/37] tracing: Add hist_data member to hist_field
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Allow hist_data access via hist_field. Some users of hist_fields
require or will require more access to the associated hist_data.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -39,6 +39,7 @@ struct hist_field {
unsigned int offset;
unsigned int is_signed;
struct hist_field *operands[HIST_FIELD_OPERANDS_MAX];
+ struct hist_trigger_data *hist_data;
};
static u64 hist_field_none(struct hist_field *field, void *event,
@@ -420,7 +421,8 @@ static void destroy_hist_field(struct hi
kfree(hist_field);
}
-static struct hist_field *create_hist_field(struct ftrace_event_field *field,
+static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
+ struct ftrace_event_field *field,
unsigned long flags)
{
struct hist_field *hist_field;
@@ -432,6 +434,8 @@ static struct hist_field *create_hist_fi
if (!hist_field)
return NULL;
+ hist_field->hist_data = hist_data;
+
if (flags & HIST_FIELD_FL_HITCOUNT) {
hist_field->fn = hist_field_counter;
goto out;
@@ -445,7 +449,7 @@ static struct hist_field *create_hist_fi
if (flags & HIST_FIELD_FL_LOG2) {
unsigned long fl = flags & ~HIST_FIELD_FL_LOG2;
hist_field->fn = hist_field_log2;
- hist_field->operands[0] = create_hist_field(field, fl);
+ hist_field->operands[0] = create_hist_field(hist_data, field, fl);
hist_field->size = hist_field->operands[0]->size;
goto out;
}
@@ -498,7 +502,7 @@ static void destroy_hist_fields(struct h
static int create_hitcount_val(struct hist_trigger_data *hist_data)
{
hist_data->fields[HITCOUNT_IDX] =
- create_hist_field(NULL, HIST_FIELD_FL_HITCOUNT);
+ create_hist_field(hist_data, NULL, HIST_FIELD_FL_HITCOUNT);
if (!hist_data->fields[HITCOUNT_IDX])
return -ENOMEM;
@@ -544,7 +548,7 @@ static int create_val_field(struct hist_
}
}
- hist_data->fields[val_idx] = create_hist_field(field, flags);
+ hist_data->fields[val_idx] = create_hist_field(hist_data, field, flags);
if (!hist_data->fields[val_idx]) {
ret = -ENOMEM;
goto out;
@@ -654,7 +658,7 @@ static int create_key_field(struct hist_
}
}
- hist_data->fields[key_idx] = create_hist_field(field, flags);
+ hist_data->fields[key_idx] = create_hist_field(hist_data, field, flags);
if (!hist_data->fields[key_idx]) {
ret = -ENOMEM;
goto out;

View File

@ -1,36 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 20 Dec 2017 17:13:03 +0100
Subject: [PATCH 14/29] hrtimer: Use accesor functions instead of direct access
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
__hrtimer_hres_active() is now available unconditionally. Replace the
direct access to hrtimer_cpu_base.hres_active.
No functional change.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/time/hrtimer.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -564,7 +564,7 @@ hrtimer_force_reprogram(struct hrtimer_c
{
ktime_t expires_next;
- if (!cpu_base->hres_active)
+ if (!__hrtimer_hres_active(cpu_base))
return;
expires_next = __hrtimer_get_next_event(cpu_base);
@@ -673,7 +673,7 @@ static void retrigger_next_event(void *a
{
struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
- if (!base->hres_active)
+ if (!__hrtimer_hres_active(base))
return;
raw_spin_lock(&base->lock);

View File

@ -1,27 +0,0 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 10 Mar 2018 16:40:33 -0500
Subject: [PATCH 14/17] oprofilefs: don't oops on allocation failure
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit a7498968338da9b928f5d8054acc8be6ed2bc14c
... just short-circuit the creation of potential children
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/oprofile/oprofilefs.c | 3 +++
1 file changed, 3 insertions(+)
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -138,6 +138,9 @@ static int __oprofilefs_create_file(stru
struct dentry *dentry;
struct inode *inode;
+ if (!root)
+ return -ENOMEM;
+
inode_lock(d_inode(root));
dentry = d_alloc_name(root, name);
if (!dentry) {

View File

@ -1,323 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:40 -0600
Subject: [PATCH 14/48] ring-buffer: Redefine the unimplemented
RINGBUF_TYPE_TIME_STAMP
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
RINGBUF_TYPE_TIME_STAMP is defined but not used, and from what I can
gather was reserved for something like an absolute timestamp feature
for the ring buffer, if not a complete replacement of the current
time_delta scheme.
This code redefines RINGBUF_TYPE_TIME_STAMP to implement absolute time
stamps. Another way to look at it is that it essentially forces
extended time_deltas for all events.
The motivation for doing this is to enable time_deltas that aren't
dependent on previous events in the ring buffer, making it feasible to
use the ring_buffer_event timetamps in a more random-access way, for
purposes other than serial event printing.
To set/reset this mode, use tracing_set_timestamp_abs() from the
previous interface patch.
Link: http://lkml.kernel.org/r/477b362dba1ce7fab9889a1a8e885a62c472f041.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 58c0bd803060b0c0c9de8751382a7af5f507d74d)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/ring_buffer.h | 12 ++---
kernel/trace/ring_buffer.c | 104 ++++++++++++++++++++++++++++++++------------
2 files changed, 83 insertions(+), 33 deletions(-)
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -34,10 +34,12 @@ struct ring_buffer_event {
* array[0] = time delta (28 .. 59)
* size = 8 bytes
*
- * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock
- * array[0] = tv_nsec
- * array[1..2] = tv_sec
- * size = 16 bytes
+ * @RINGBUF_TYPE_TIME_STAMP: Absolute timestamp
+ * Same format as TIME_EXTEND except that the
+ * value is an absolute timestamp, not a delta
+ * event.time_delta contains bottom 27 bits
+ * array[0] = top (28 .. 59) bits
+ * size = 8 bytes
*
* <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX:
* Data record
@@ -54,12 +56,12 @@ enum ring_buffer_type {
RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28,
RINGBUF_TYPE_PADDING,
RINGBUF_TYPE_TIME_EXTEND,
- /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
RINGBUF_TYPE_TIME_STAMP,
};
unsigned ring_buffer_event_length(struct ring_buffer_event *event);
void *ring_buffer_event_data(struct ring_buffer_event *event);
+u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event);
/*
* ring_buffer_discard_commit will remove an event that has not
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -41,6 +41,8 @@ int ring_buffer_print_entry_header(struc
RINGBUF_TYPE_PADDING);
trace_seq_printf(s, "\ttime_extend : type == %d\n",
RINGBUF_TYPE_TIME_EXTEND);
+ trace_seq_printf(s, "\ttime_stamp : type == %d\n",
+ RINGBUF_TYPE_TIME_STAMP);
trace_seq_printf(s, "\tdata max type_len == %d\n",
RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
@@ -140,12 +142,15 @@ int ring_buffer_print_entry_header(struc
enum {
RB_LEN_TIME_EXTEND = 8,
- RB_LEN_TIME_STAMP = 16,
+ RB_LEN_TIME_STAMP = 8,
};
#define skip_time_extend(event) \
((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
+#define extended_time(event) \
+ (event->type_len >= RINGBUF_TYPE_TIME_EXTEND)
+
static inline int rb_null_event(struct ring_buffer_event *event)
{
return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -209,7 +214,7 @@ rb_event_ts_length(struct ring_buffer_ev
{
unsigned len = 0;
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+ if (extended_time(event)) {
/* time extends include the data event after it */
len = RB_LEN_TIME_EXTEND;
event = skip_time_extend(event);
@@ -231,7 +236,7 @@ unsigned ring_buffer_event_length(struct
{
unsigned length;
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ if (extended_time(event))
event = skip_time_extend(event);
length = rb_event_length(event);
@@ -248,7 +253,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_leng
static __always_inline void *
rb_event_data(struct ring_buffer_event *event)
{
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ if (extended_time(event))
event = skip_time_extend(event);
BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
/* If length is in len field, then array[0] has the data */
@@ -275,6 +280,27 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data
#define TS_MASK ((1ULL << TS_SHIFT) - 1)
#define TS_DELTA_TEST (~TS_MASK)
+/**
+ * ring_buffer_event_time_stamp - return the event's extended timestamp
+ * @event: the event to get the timestamp of
+ *
+ * Returns the extended timestamp associated with a data event.
+ * An extended time_stamp is a 64-bit timestamp represented
+ * internally in a special way that makes the best use of space
+ * contained within a ring buffer event. This function decodes
+ * it and maps it to a straight u64 value.
+ */
+u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event)
+{
+ u64 ts;
+
+ ts = event->array[0];
+ ts <<= TS_SHIFT;
+ ts += event->time_delta;
+
+ return ts;
+}
+
/* Flag when events were overwritten */
#define RB_MISSED_EVENTS (1 << 31)
/* Missed count stored at end */
@@ -2222,12 +2248,15 @@ rb_move_tail(struct ring_buffer_per_cpu
/* Slow path, do not inline */
static noinline struct ring_buffer_event *
-rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
+rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs)
{
- event->type_len = RINGBUF_TYPE_TIME_EXTEND;
+ if (abs)
+ event->type_len = RINGBUF_TYPE_TIME_STAMP;
+ else
+ event->type_len = RINGBUF_TYPE_TIME_EXTEND;
- /* Not the first event on the page? */
- if (rb_event_index(event)) {
+ /* Not the first event on the page, or not delta? */
+ if (abs || rb_event_index(event)) {
event->time_delta = delta & TS_MASK;
event->array[0] = delta >> TS_SHIFT;
} else {
@@ -2270,7 +2299,9 @@ rb_update_event(struct ring_buffer_per_c
* add it to the start of the resevered space.
*/
if (unlikely(info->add_timestamp)) {
- event = rb_add_time_stamp(event, delta);
+ bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer);
+
+ event = rb_add_time_stamp(event, info->delta, abs);
length -= RB_LEN_TIME_EXTEND;
delta = 0;
}
@@ -2458,7 +2489,7 @@ static __always_inline void rb_end_commi
static inline void rb_event_discard(struct ring_buffer_event *event)
{
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ if (extended_time(event))
event = skip_time_extend(event);
/* array[0] holds the actual length for the discarded event */
@@ -2502,10 +2533,11 @@ rb_update_write_stamp(struct ring_buffer
cpu_buffer->write_stamp =
cpu_buffer->commit_page->page->time_stamp;
else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
- delta = event->array[0];
- delta <<= TS_SHIFT;
- delta += event->time_delta;
+ delta = ring_buffer_event_time_stamp(event);
cpu_buffer->write_stamp += delta;
+ } else if (event->type_len == RINGBUF_TYPE_TIME_STAMP) {
+ delta = ring_buffer_event_time_stamp(event);
+ cpu_buffer->write_stamp = delta;
} else
cpu_buffer->write_stamp += event->time_delta;
}
@@ -2685,7 +2717,7 @@ static struct ring_buffer_event *
* If this is the first commit on the page, then it has the same
* timestamp as the page itself.
*/
- if (!tail)
+ if (!tail && !ring_buffer_time_stamp_abs(cpu_buffer->buffer))
info->delta = 0;
/* See if we shot pass the end of this buffer page */
@@ -2762,8 +2794,11 @@ rb_reserve_next_event(struct ring_buffer
/* make sure this diff is calculated here */
barrier();
- /* Did the write stamp get updated already? */
- if (likely(info.ts >= cpu_buffer->write_stamp)) {
+ if (ring_buffer_time_stamp_abs(buffer)) {
+ info.delta = info.ts;
+ rb_handle_timestamp(cpu_buffer, &info);
+ } else /* Did the write stamp get updated already? */
+ if (likely(info.ts >= cpu_buffer->write_stamp)) {
info.delta = diff;
if (unlikely(test_time_stamp(info.delta)))
rb_handle_timestamp(cpu_buffer, &info);
@@ -3445,14 +3480,13 @@ rb_update_read_stamp(struct ring_buffer_
return;
case RINGBUF_TYPE_TIME_EXTEND:
- delta = event->array[0];
- delta <<= TS_SHIFT;
- delta += event->time_delta;
+ delta = ring_buffer_event_time_stamp(event);
cpu_buffer->read_stamp += delta;
return;
case RINGBUF_TYPE_TIME_STAMP:
- /* FIXME: not implemented */
+ delta = ring_buffer_event_time_stamp(event);
+ cpu_buffer->read_stamp = delta;
return;
case RINGBUF_TYPE_DATA:
@@ -3476,14 +3510,13 @@ rb_update_iter_read_stamp(struct ring_bu
return;
case RINGBUF_TYPE_TIME_EXTEND:
- delta = event->array[0];
- delta <<= TS_SHIFT;
- delta += event->time_delta;
+ delta = ring_buffer_event_time_stamp(event);
iter->read_stamp += delta;
return;
case RINGBUF_TYPE_TIME_STAMP:
- /* FIXME: not implemented */
+ delta = ring_buffer_event_time_stamp(event);
+ iter->read_stamp = delta;
return;
case RINGBUF_TYPE_DATA:
@@ -3707,6 +3740,8 @@ rb_buffer_peek(struct ring_buffer_per_cp
struct buffer_page *reader;
int nr_loops = 0;
+ if (ts)
+ *ts = 0;
again:
/*
* We repeat when a time extend is encountered.
@@ -3743,12 +3778,17 @@ rb_buffer_peek(struct ring_buffer_per_cp
goto again;
case RINGBUF_TYPE_TIME_STAMP:
- /* FIXME: not implemented */
+ if (ts) {
+ *ts = ring_buffer_event_time_stamp(event);
+ ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
+ cpu_buffer->cpu, ts);
+ }
+ /* Internal data, OK to advance */
rb_advance_reader(cpu_buffer);
goto again;
case RINGBUF_TYPE_DATA:
- if (ts) {
+ if (ts && !(*ts)) {
*ts = cpu_buffer->read_stamp + event->time_delta;
ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
cpu_buffer->cpu, ts);
@@ -3773,6 +3813,9 @@ rb_iter_peek(struct ring_buffer_iter *it
struct ring_buffer_event *event;
int nr_loops = 0;
+ if (ts)
+ *ts = 0;
+
cpu_buffer = iter->cpu_buffer;
buffer = cpu_buffer->buffer;
@@ -3825,12 +3868,17 @@ rb_iter_peek(struct ring_buffer_iter *it
goto again;
case RINGBUF_TYPE_TIME_STAMP:
- /* FIXME: not implemented */
+ if (ts) {
+ *ts = ring_buffer_event_time_stamp(event);
+ ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
+ cpu_buffer->cpu, ts);
+ }
+ /* Internal data, OK to advance */
rb_advance_iter(iter);
goto again;
case RINGBUF_TYPE_DATA:
- if (ts) {
+ if (ts && !(*ts)) {
*ts = iter->read_stamp + event->time_delta;
ring_buffer_normalize_time_stamp(buffer,
cpu_buffer->cpu, ts);

View File

@ -1,154 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:48 -0600
Subject: [PATCH 14/37] tracing: Add usecs modifier for hist trigger timestamps
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Appending .usecs onto a common_timestamp field will cause the
timestamp value to be in microseconds instead of the default
nanoseconds. A typical latency histogram using usecs would look like
this:
# echo 'hist:keys=pid,prio:ts0=common_timestamp.usecs ...
# echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0 ...
This also adds an external trace_clock_in_ns() to trace.c for the
timestamp conversion.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
Documentation/trace/histogram.txt | 1 +
kernel/trace/trace.c | 13 +++++++++++--
kernel/trace/trace.h | 2 ++
kernel/trace/trace_events_hist.c | 28 ++++++++++++++++++++++------
4 files changed, 36 insertions(+), 8 deletions(-)
--- a/Documentation/trace/histogram.txt
+++ b/Documentation/trace/histogram.txt
@@ -74,6 +74,7 @@
.syscall display a syscall id as a system call name
.execname display a common_pid as a program name
.log2 display log2 value rather than raw number
+ .usecs display a common_timestamp in microseconds
Note that in general the semantics of a given field aren't
interpreted when applying a modifier to it, but there are some
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1170,6 +1170,14 @@ static struct {
ARCH_TRACE_CLOCKS
};
+bool trace_clock_in_ns(struct trace_array *tr)
+{
+ if (trace_clocks[tr->clock_id].in_ns)
+ return true;
+
+ return false;
+}
+
/*
* trace_parser_get_init - gets the buffer for trace parser
*/
@@ -4699,8 +4707,9 @@ static const char readme_msg[] =
"\t .sym display an address as a symbol\n"
"\t .sym-offset display an address as a symbol and offset\n"
"\t .execname display a common_pid as a program name\n"
- "\t .syscall display a syscall id as a syscall name\n\n"
- "\t .log2 display log2 value rather than raw number\n\n"
+ "\t .syscall display a syscall id as a syscall name\n"
+ "\t .log2 display log2 value rather than raw number\n"
+ "\t .usecs display a common_timestamp in microseconds\n\n"
"\t The 'pause' parameter can be used to pause an existing hist\n"
"\t trigger or to start a hist trigger but not log any events\n"
"\t until told to do so. 'continue' can be used to start or\n"
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -289,6 +289,8 @@ extern void trace_array_put(struct trace
extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs);
+extern bool trace_clock_in_ns(struct trace_array *tr);
+
/*
* The global tracer (top) should be the first trace array added,
* but we check the flag anyway.
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -90,12 +90,6 @@ static u64 hist_field_log2(struct hist_f
return (u64) ilog2(roundup_pow_of_two(val));
}
-static u64 hist_field_timestamp(struct hist_field *hist_field, void *event,
- struct ring_buffer_event *rbe)
-{
- return ring_buffer_event_time_stamp(rbe);
-}
-
#define DEFINE_HIST_FIELD_FN(type) \
static u64 hist_field_##type(struct hist_field *hist_field, \
void *event, \
@@ -143,6 +137,7 @@ enum hist_field_flags {
HIST_FIELD_FL_STACKTRACE = 1 << 8,
HIST_FIELD_FL_LOG2 = 1 << 9,
HIST_FIELD_FL_TIMESTAMP = 1 << 10,
+ HIST_FIELD_FL_TIMESTAMP_USECS = 1 << 11,
};
struct hist_trigger_attrs {
@@ -153,6 +148,7 @@ struct hist_trigger_attrs {
bool pause;
bool cont;
bool clear;
+ bool ts_in_usecs;
unsigned int map_bits;
};
@@ -170,6 +166,20 @@ struct hist_trigger_data {
bool enable_timestamps;
};
+static u64 hist_field_timestamp(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
+{
+ struct hist_trigger_data *hist_data = hist_field->hist_data;
+ struct trace_array *tr = hist_data->event_file->tr;
+
+ u64 ts = ring_buffer_event_time_stamp(rbe);
+
+ if (hist_data->attrs->ts_in_usecs && trace_clock_in_ns(tr))
+ ts = ns2usecs(ts);
+
+ return ts;
+}
+
static const char *hist_field_name(struct hist_field *field,
unsigned int level)
{
@@ -634,6 +644,8 @@ static int create_key_field(struct hist_
flags |= HIST_FIELD_FL_SYSCALL;
else if (strcmp(field_str, "log2") == 0)
flags |= HIST_FIELD_FL_LOG2;
+ else if (strcmp(field_str, "usecs") == 0)
+ flags |= HIST_FIELD_FL_TIMESTAMP_USECS;
else {
ret = -EINVAL;
goto out;
@@ -643,6 +655,8 @@ static int create_key_field(struct hist_
if (strcmp(field_name, "common_timestamp") == 0) {
flags |= HIST_FIELD_FL_TIMESTAMP;
hist_data->enable_timestamps = true;
+ if (flags & HIST_FIELD_FL_TIMESTAMP_USECS)
+ hist_data->attrs->ts_in_usecs = true;
key_size = sizeof(u64);
} else {
field = trace_find_event_field(file->event_call, field_name);
@@ -1243,6 +1257,8 @@ static const char *get_hist_field_flags(
flags_str = "syscall";
else if (hist_field->flags & HIST_FIELD_FL_LOG2)
flags_str = "log2";
+ else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS)
+ flags_str = "usecs";
return flags_str;
}

View File

@ -1,131 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 20 Dec 2017 17:13:04 +0100
Subject: [PATCH 15/29] hrtimer: Make the remote enqueue check unconditional
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
hrtimer_cpu_base.expires_next is used to cache the next event armed in the
timer hardware. The value is used to check whether an hrtimer can be
enqueued remotely. If the new hrtimer is expiring before expires_next, then
remote enqueue is not possible as the remote hrtimer hardware cannot be
accessed for reprogramming to an earlier expiry time.
The remote enqueue check is currently conditional on
CONFIG_HIGH_RES_TIMERS=y and hrtimer_cpu_base.hres_active. There is no
compelling reason to make this conditional.
Move hrtimer_cpu_base.expires_next out of the CONFIG_HIGH_RES_TIMERS=y
guarded area and remove the conditionals in hrtimer_check_target().
The check is currently a NOOP for the CONFIG_HIGH_RES_TIMERS=n and the
!hrtimer_cpu_base.hres_active case because in these cases nothing updates
hrtimer_cpu_base.expires_next yet. This will be changed with later patches
which further reduce the #ifdef zoo in this code.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/hrtimer.h | 6 +++---
kernel/time/hrtimer.c | 26 ++++++--------------------
2 files changed, 9 insertions(+), 23 deletions(-)
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -164,13 +164,13 @@ enum hrtimer_base_type {
* @hres_active: State of high resolution mode
* @in_hrtirq: hrtimer_interrupt() is currently executing
* @hang_detected: The last hrtimer interrupt detected a hang
- * @expires_next: absolute time of the next event, is required for remote
- * hrtimer enqueue
* @next_timer: Pointer to the first expiring timer
* @nr_events: Total number of hrtimer interrupt events
* @nr_retries: Total number of hrtimer interrupt retries
* @nr_hangs: Total number of hrtimer interrupt hangs
* @max_hang_time: Maximum time spent in hrtimer_interrupt
+ * @expires_next: absolute time of the next event, is required for remote
+ * hrtimer enqueue
* @clock_base: array of clock bases for this cpu
*
* Note: next_timer is just an optimization for __remove_hrtimer().
@@ -186,13 +186,13 @@ struct hrtimer_cpu_base {
#ifdef CONFIG_HIGH_RES_TIMERS
unsigned int in_hrtirq : 1,
hang_detected : 1;
- ktime_t expires_next;
struct hrtimer *next_timer;
unsigned int nr_events;
unsigned short nr_retries;
unsigned short nr_hangs;
unsigned int max_hang_time;
#endif
+ ktime_t expires_next;
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
} ____cacheline_aligned;
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -154,26 +154,21 @@ struct hrtimer_clock_base *lock_hrtimer_
}
/*
- * With HIGHRES=y we do not migrate the timer when it is expiring
- * before the next event on the target cpu because we cannot reprogram
- * the target cpu hardware and we would cause it to fire late.
+ * We do not migrate the timer when it is expiring before the next
+ * event on the target cpu. When high resolution is enabled, we cannot
+ * reprogram the target cpu hardware and we would cause it to fire
+ * late. To keep it simple, we handle the high resolution enabled and
+ * disabled case similar.
*
* Called with cpu_base->lock of target cpu held.
*/
static int
hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
{
-#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires;
- if (!new_base->cpu_base->hres_active)
- return 0;
-
expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
return expires <= new_base->cpu_base->expires_next;
-#else
- return 0;
-#endif
}
static inline
@@ -657,16 +652,6 @@ static void hrtimer_reprogram(struct hrt
}
/*
- * Initialize the high resolution related parts of cpu_base
- */
-static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
-{
- base->expires_next = KTIME_MAX;
- base->hang_detected = 0;
- base->next_timer = NULL;
-}
-
-/*
* Retrigger next event is called after clock was set
*
* Called with interrupts disabled via on_each_cpu()
@@ -731,7 +716,6 @@ static inline int hrtimer_reprogram(stru
{
return 0;
}
-static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
static inline void retrigger_next_event(void *arg) { }
#endif /* CONFIG_HIGH_RES_TIMERS */
@@ -1604,7 +1588,7 @@ int hrtimers_prepare_cpu(unsigned int cp
cpu_base->active_bases = 0;
cpu_base->cpu = cpu;
cpu_base->hres_active = 0;
- hrtimer_init_hres(cpu_base);
+ cpu_base->expires_next = KTIME_MAX;
return 0;
}

View File

@ -1,212 +0,0 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 10 Mar 2018 23:15:52 -0500
Subject: [PATCH 15/17] make non-exchanging __d_move() copy ->d_parent rather
than swap them
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 076515fc926793e162fc6525bed1679ef2bbf269
Currently d_move(from, to) does the following:
* name/parent of from <- old name/parent of to, from hashed there
* to is unhashed
* name of to is preserved
* if from used to be detached, to gets detached
* if from used to be attached, parent of to <- old parent of from.
That's both user-visibly bogus and complicates reasoning a lot.
Much saner semantics would be
* name/parent of from <- name/parent of to, from hashed there.
* to is unhashed
* name/parent of to is unchanged.
The price, of course, is that old parent of from might lose a reference.
However,
* all potentially cross-directory callers of d_move() have both
parents pinned directly; typically, dentries themselves are grabbed
only after we have grabbed and locked both parents. IOW, the decrement
of old parent's refcount in case of d_move() won't reach zero.
* __d_move() from d_splice_alias() is done to detached alias.
No refcount decrements in that case
* __d_move() from __d_unalias() *can* get the refcount to zero.
So let's grab a reference to alias' old parent before calling __d_unalias()
and dput() it after we'd dropped rename_lock.
That does make d_splice_alias() potentially blocking. However, it has
no callers in non-sleepable contexts (and the case where we'd grown
that dget/dput pair is _very_ rare, so performance is not an issue).
Another thing that needs adjustment is unlocking in the end of __d_move();
folded it in. And cleaned the remnants of bogus ordering from the
"lock them in the beginning" counterpart - it's never been right and
now (well, for 7 years now) we have that thing always serialized on
rename_lock anyway.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 93 +++++++++++++++++++-----------------------------------------
1 file changed, 30 insertions(+), 63 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -67,9 +67,7 @@
* dentry->d_lock
*
* If no ancestor relationship:
- * if (dentry1 < dentry2)
- * dentry1->d_lock
- * dentry2->d_lock
+ * arbitrary, since it's serialized on rename_lock
*/
int sysctl_vfs_cache_pressure __read_mostly = 100;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
@@ -2777,9 +2775,6 @@ static void copy_name(struct dentry *den
static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
{
- /*
- * XXXX: do we really need to take target->d_lock?
- */
if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
spin_lock(&target->d_parent->d_lock);
else {
@@ -2793,40 +2788,11 @@ static void dentry_lock_for_move(struct
DENTRY_D_LOCK_NESTED);
}
}
- if (target < dentry) {
- spin_lock_nested(&target->d_lock, 2);
- spin_lock_nested(&dentry->d_lock, 3);
- } else {
- spin_lock_nested(&dentry->d_lock, 2);
- spin_lock_nested(&target->d_lock, 3);
- }
-}
-
-static void dentry_unlock_for_move(struct dentry *dentry, struct dentry *target)
-{
- if (target->d_parent != dentry->d_parent)
- spin_unlock(&dentry->d_parent->d_lock);
- if (target->d_parent != target)
- spin_unlock(&target->d_parent->d_lock);
- spin_unlock(&target->d_lock);
- spin_unlock(&dentry->d_lock);
+ spin_lock_nested(&dentry->d_lock, 2);
+ spin_lock_nested(&target->d_lock, 3);
}
/*
- * When switching names, the actual string doesn't strictly have to
- * be preserved in the target - because we're dropping the target
- * anyway. As such, we can just do a simple memcpy() to copy over
- * the new name before we switch, unless we are going to rehash
- * it. Note that if we *do* unhash the target, we are not allowed
- * to rehash it without giving it a new name/hash key - whether
- * we swap or overwrite the names here, resulting name won't match
- * the reality in filesystem; it's only there for d_path() purposes.
- * Note that all of this is happening under rename_lock, so the
- * any hash lookup seeing it in the middle of manipulations will
- * be discarded anyway. So we do not care what happens to the hash
- * key in that case.
- */
-/*
* __d_move - move a dentry
* @dentry: entry to move
* @target: new dentry
@@ -2840,6 +2806,7 @@ static void dentry_unlock_for_move(struc
static void __d_move(struct dentry *dentry, struct dentry *target,
bool exchange)
{
+ struct dentry *old_parent;
struct inode *dir = NULL;
unsigned n;
if (!dentry->d_inode)
@@ -2858,49 +2825,47 @@ static void __d_move(struct dentry *dent
write_seqcount_begin(&dentry->d_seq);
write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
+ old_parent = dentry->d_parent;
+
/* unhash both */
if (!d_unhashed(dentry))
___d_drop(dentry);
if (!d_unhashed(target))
___d_drop(target);
- /* Switch the names.. */
- if (exchange)
- swap_names(dentry, target);
- else
+ /* ... and switch them in the tree */
+ dentry->d_parent = target->d_parent;
+ if (!exchange) {
copy_name(dentry, target);
-
- /* rehash in new place(s) */
- __d_rehash(dentry);
- if (exchange)
- __d_rehash(target);
- else
target->d_hash.pprev = NULL;
-
- /* ... and switch them in the tree */
- if (IS_ROOT(dentry)) {
- /* splicing a tree */
- dentry->d_flags |= DCACHE_RCUACCESS;
- dentry->d_parent = target->d_parent;
- target->d_parent = target;
- list_del_init(&target->d_child);
- list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
+ dentry->d_parent->d_lockref.count++;
+ if (dentry == old_parent)
+ dentry->d_flags |= DCACHE_RCUACCESS;
+ else
+ WARN_ON(!--old_parent->d_lockref.count);
} else {
- /* swapping two dentries */
- swap(dentry->d_parent, target->d_parent);
+ target->d_parent = old_parent;
+ swap_names(dentry, target);
list_move(&target->d_child, &target->d_parent->d_subdirs);
- list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
- if (exchange)
- fsnotify_update_flags(target);
- fsnotify_update_flags(dentry);
+ __d_rehash(target);
+ fsnotify_update_flags(target);
}
+ list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
+ __d_rehash(dentry);
+ fsnotify_update_flags(dentry);
write_seqcount_end(&target->d_seq);
write_seqcount_end(&dentry->d_seq);
if (dir)
end_dir_add(dir, n);
- dentry_unlock_for_move(dentry, target);
+
+ if (dentry->d_parent != old_parent)
+ spin_unlock(&dentry->d_parent->d_lock);
+ if (dentry != old_parent)
+ spin_unlock(&old_parent->d_lock);
+ spin_unlock(&target->d_lock);
+ spin_unlock(&dentry->d_lock);
}
/*
@@ -3048,12 +3013,14 @@ struct dentry *d_splice_alias(struct ino
inode->i_sb->s_type->name,
inode->i_sb->s_id);
} else if (!IS_ROOT(new)) {
+ struct dentry *old_parent = dget(new->d_parent);
int err = __d_unalias(inode, dentry, new);
write_sequnlock(&rename_lock);
if (err) {
dput(new);
new = ERR_PTR(err);
}
+ dput(old_parent);
} else {
__d_move(new, dentry, false);
write_sequnlock(&rename_lock);

View File

@ -1,138 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:41 -0600
Subject: [PATCH 15/48] tracing: Add timestamp_mode trace file
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Add a new option flag indicating whether or not the ring buffer is in
'absolute timestamp' mode.
Currently this is only set/unset by hist triggers that make use of a
common_timestamp. As such, there's no reason to make this writeable
for users - its purpose is only to allow users to determine
unequivocally whether or not the ring buffer is in that mode (although
absolute timestamps can coexist with the normal delta timestamps, when
the ring buffer is in absolute mode, timestamps written while absolute
mode is in effect take up more space in the buffer, and are not as
efficient).
Link: http://lkml.kernel.org/r/e8aa7b1cde1cf15014e66545d06ac6ef2ebba456.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 0eba34f9bf5b66217355a6a66054b3194aca123d)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
Documentation/trace/ftrace.txt | 24 ++++++++++++++++++++
kernel/trace/trace.c | 47 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 71 insertions(+)
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -539,6 +539,30 @@ After mounting tracefs you will have acc
See events.txt for more information.
+ timestamp_mode:
+
+ Certain tracers may change the timestamp mode used when
+ logging trace events into the event buffer. Events with
+ different modes can coexist within a buffer but the mode in
+ effect when an event is logged determines which timestamp mode
+ is used for that event. The default timestamp mode is
+ 'delta'.
+
+ Usual timestamp modes for tracing:
+
+ # cat timestamp_mode
+ [delta] absolute
+
+ The timestamp mode with the square brackets around it is the
+ one in effect.
+
+ delta: Default timestamp mode - timestamp is a delta against
+ a per-buffer timestamp.
+
+ absolute: The timestamp is a full timestamp, not a delta
+ against some other value. As such it takes up more
+ space and is less efficient.
+
hwlat_detector:
Directory for the Hardware Latency Detector.
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4515,6 +4515,9 @@ static const char readme_msg[] =
#ifdef CONFIG_X86_64
" x86-tsc: TSC cycle counter\n"
#endif
+ "\n timestamp_mode\t-view the mode used to timestamp events\n"
+ " delta: Delta difference against a buffer-wide timestamp\n"
+ " absolute: Absolute (standalone) timestamp\n"
"\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
"\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
" tracing_cpumask\t- Limit which CPUs to trace\n"
@@ -6282,6 +6285,40 @@ static int tracing_clock_open(struct ino
return ret;
}
+static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
+{
+ struct trace_array *tr = m->private;
+
+ mutex_lock(&trace_types_lock);
+
+ if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
+ seq_puts(m, "delta [absolute]\n");
+ else
+ seq_puts(m, "[delta] absolute\n");
+
+ mutex_unlock(&trace_types_lock);
+
+ return 0;
+}
+
+static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+ int ret;
+
+ if (tracing_disabled)
+ return -ENODEV;
+
+ if (trace_array_get(tr))
+ return -ENODEV;
+
+ ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
+ if (ret < 0)
+ trace_array_put(tr);
+
+ return ret;
+}
+
int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
{
int ret = 0;
@@ -6560,6 +6597,13 @@ static const struct file_operations trac
.write = tracing_clock_write,
};
+static const struct file_operations trace_time_stamp_mode_fops = {
+ .open = tracing_time_stamp_mode_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = tracing_single_release_tr,
+};
+
#ifdef CONFIG_TRACER_SNAPSHOT
static const struct file_operations snapshot_fops = {
.open = tracing_snapshot_open,
@@ -7882,6 +7926,9 @@ init_tracer_tracefs(struct trace_array *
trace_create_file("tracing_on", 0644, d_tracer,
tr, &rb_simple_fops);
+ trace_create_file("timestamp_mode", 0444, d_tracer, tr,
+ &trace_time_stamp_mode_fops);
+
create_trace_options_dir(tr);
#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)

View File

@ -1,779 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:49 -0600
Subject: [PATCH 15/37] tracing: Add variable support to hist triggers
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Add support for saving the value of a current event's event field by
assigning it to a variable that can be read by a subsequent event.
The basic syntax for saving a variable is to simply prefix a unique
variable name not corresponding to any keyword along with an '=' sign
to any event field.
Both keys and values can be saved and retrieved in this way:
# echo 'hist:keys=next_pid:vals=$ts0:ts0=common_timestamp ...
# echo 'hist:timer_pid=common_pid:key=$timer_pid ...'
If a variable isn't a key variable or prefixed with 'vals=', the
associated event field will be saved in a variable but won't be summed
as a value:
# echo 'hist:keys=next_pid:ts1=common_timestamp:...
Multiple variables can be assigned at the same time:
# echo 'hist:keys=pid:vals=$ts0,$b,field2:ts0=common_timestamp,b=field1 ...
Multiple (or single) variables can also be assigned at the same time
using separate assignments:
# echo 'hist:keys=pid:vals=$ts0:ts0=common_timestamp:b=field1:c=field2 ...
Variables set as above can be used by being referenced from another
event, as described in a subsequent patch.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Baohong Liu <baohong.liu@intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 370 ++++++++++++++++++++++++++++++++++-----
1 file changed, 331 insertions(+), 39 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -30,6 +30,13 @@ typedef u64 (*hist_field_fn_t) (struct h
struct ring_buffer_event *rbe);
#define HIST_FIELD_OPERANDS_MAX 2
+#define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX)
+
+struct hist_var {
+ char *name;
+ struct hist_trigger_data *hist_data;
+ unsigned int idx;
+};
struct hist_field {
struct ftrace_event_field *field;
@@ -40,6 +47,7 @@ struct hist_field {
unsigned int is_signed;
struct hist_field *operands[HIST_FIELD_OPERANDS_MAX];
struct hist_trigger_data *hist_data;
+ struct hist_var var;
};
static u64 hist_field_none(struct hist_field *field, void *event,
@@ -138,6 +146,13 @@ enum hist_field_flags {
HIST_FIELD_FL_LOG2 = 1 << 9,
HIST_FIELD_FL_TIMESTAMP = 1 << 10,
HIST_FIELD_FL_TIMESTAMP_USECS = 1 << 11,
+ HIST_FIELD_FL_VAR = 1 << 12,
+};
+
+struct var_defs {
+ unsigned int n_vars;
+ char *name[TRACING_MAP_VARS_MAX];
+ char *expr[TRACING_MAP_VARS_MAX];
};
struct hist_trigger_attrs {
@@ -150,13 +165,19 @@ struct hist_trigger_attrs {
bool clear;
bool ts_in_usecs;
unsigned int map_bits;
+
+ char *assignment_str[TRACING_MAP_VARS_MAX];
+ unsigned int n_assignments;
+
+ struct var_defs var_defs;
};
struct hist_trigger_data {
- struct hist_field *fields[TRACING_MAP_FIELDS_MAX];
+ struct hist_field *fields[HIST_FIELDS_MAX];
unsigned int n_vals;
unsigned int n_keys;
unsigned int n_fields;
+ unsigned int n_vars;
unsigned int key_size;
struct tracing_map_sort_key sort_keys[TRACING_MAP_SORT_KEYS_MAX];
unsigned int n_sort_keys;
@@ -164,6 +185,7 @@ struct hist_trigger_data {
struct hist_trigger_attrs *attrs;
struct tracing_map *map;
bool enable_timestamps;
+ bool remove;
};
static u64 hist_field_timestamp(struct hist_field *hist_field, void *event,
@@ -180,6 +202,48 @@ static u64 hist_field_timestamp(struct h
return ts;
}
+static struct hist_field *find_var_field(struct hist_trigger_data *hist_data,
+ const char *var_name)
+{
+ struct hist_field *hist_field, *found = NULL;
+ int i;
+
+ for_each_hist_field(i, hist_data) {
+ hist_field = hist_data->fields[i];
+ if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR &&
+ strcmp(hist_field->var.name, var_name) == 0) {
+ found = hist_field;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static struct hist_field *find_var(struct hist_trigger_data *hist_data,
+ struct trace_event_file *file,
+ const char *var_name)
+{
+ struct hist_trigger_data *test_data;
+ struct event_trigger_data *test;
+ struct hist_field *hist_field;
+
+ hist_field = find_var_field(hist_data, var_name);
+ if (hist_field)
+ return hist_field;
+
+ list_for_each_entry_rcu(test, &file->triggers, list) {
+ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+ test_data = test->private_data;
+ hist_field = find_var_field(test_data, var_name);
+ if (hist_field)
+ return hist_field;
+ }
+ }
+
+ return NULL;
+}
+
static const char *hist_field_name(struct hist_field *field,
unsigned int level)
{
@@ -262,9 +326,14 @@ static int parse_map_size(char *str)
static void destroy_hist_trigger_attrs(struct hist_trigger_attrs *attrs)
{
+ unsigned int i;
+
if (!attrs)
return;
+ for (i = 0; i < attrs->n_assignments; i++)
+ kfree(attrs->assignment_str[i]);
+
kfree(attrs->name);
kfree(attrs->sort_key_str);
kfree(attrs->keys_str);
@@ -311,8 +380,22 @@ static int parse_assignment(char *str, s
goto out;
}
attrs->map_bits = map_bits;
- } else
- ret = -EINVAL;
+ } else {
+ char *assignment;
+
+ if (attrs->n_assignments == TRACING_MAP_VARS_MAX) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ assignment = kstrdup(str, GFP_KERNEL);
+ if (!assignment) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ attrs->assignment_str[attrs->n_assignments++] = assignment;
+ }
out:
return ret;
}
@@ -428,12 +511,15 @@ static void destroy_hist_field(struct hi
for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++)
destroy_hist_field(hist_field->operands[i], level + 1);
+ kfree(hist_field->var.name);
+
kfree(hist_field);
}
static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
struct ftrace_event_field *field,
- unsigned long flags)
+ unsigned long flags,
+ char *var_name)
{
struct hist_field *hist_field;
@@ -459,7 +545,7 @@ static struct hist_field *create_hist_fi
if (flags & HIST_FIELD_FL_LOG2) {
unsigned long fl = flags & ~HIST_FIELD_FL_LOG2;
hist_field->fn = hist_field_log2;
- hist_field->operands[0] = create_hist_field(hist_data, field, fl);
+ hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL);
hist_field->size = hist_field->operands[0]->size;
goto out;
}
@@ -494,14 +580,23 @@ static struct hist_field *create_hist_fi
hist_field->field = field;
hist_field->flags = flags;
+ if (var_name) {
+ hist_field->var.name = kstrdup(var_name, GFP_KERNEL);
+ if (!hist_field->var.name)
+ goto free;
+ }
+
return hist_field;
+ free:
+ destroy_hist_field(hist_field, 0);
+ return NULL;
}
static void destroy_hist_fields(struct hist_trigger_data *hist_data)
{
unsigned int i;
- for (i = 0; i < TRACING_MAP_FIELDS_MAX; i++) {
+ for (i = 0; i < HIST_FIELDS_MAX; i++) {
if (hist_data->fields[i]) {
destroy_hist_field(hist_data->fields[i], 0);
hist_data->fields[i] = NULL;
@@ -512,11 +607,12 @@ static void destroy_hist_fields(struct h
static int create_hitcount_val(struct hist_trigger_data *hist_data)
{
hist_data->fields[HITCOUNT_IDX] =
- create_hist_field(hist_data, NULL, HIST_FIELD_FL_HITCOUNT);
+ create_hist_field(hist_data, NULL, HIST_FIELD_FL_HITCOUNT, NULL);
if (!hist_data->fields[HITCOUNT_IDX])
return -ENOMEM;
hist_data->n_vals++;
+ hist_data->n_fields++;
if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX))
return -EINVAL;
@@ -524,19 +620,16 @@ static int create_hitcount_val(struct hi
return 0;
}
-static int create_val_field(struct hist_trigger_data *hist_data,
- unsigned int val_idx,
- struct trace_event_file *file,
- char *field_str)
+static int __create_val_field(struct hist_trigger_data *hist_data,
+ unsigned int val_idx,
+ struct trace_event_file *file,
+ char *var_name, char *field_str,
+ unsigned long flags)
{
struct ftrace_event_field *field = NULL;
- unsigned long flags = 0;
char *field_name;
int ret = 0;
- if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX))
- return -EINVAL;
-
field_name = strsep(&field_str, ".");
if (field_str) {
if (strcmp(field_str, "hex") == 0)
@@ -558,25 +651,58 @@ static int create_val_field(struct hist_
}
}
- hist_data->fields[val_idx] = create_hist_field(hist_data, field, flags);
+ hist_data->fields[val_idx] = create_hist_field(hist_data, field, flags, var_name);
if (!hist_data->fields[val_idx]) {
ret = -ENOMEM;
goto out;
}
++hist_data->n_vals;
+ ++hist_data->n_fields;
- if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX))
+ if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX))
ret = -EINVAL;
out:
return ret;
}
+static int create_val_field(struct hist_trigger_data *hist_data,
+ unsigned int val_idx,
+ struct trace_event_file *file,
+ char *field_str)
+{
+ if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX))
+ return -EINVAL;
+
+ return __create_val_field(hist_data, val_idx, file, NULL, field_str, 0);
+}
+
+static int create_var_field(struct hist_trigger_data *hist_data,
+ unsigned int val_idx,
+ struct trace_event_file *file,
+ char *var_name, char *expr_str)
+{
+ unsigned long flags = 0;
+
+ if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX))
+ return -EINVAL;
+ if (find_var(hist_data, file, var_name) && !hist_data->remove) {
+ return -EINVAL;
+ }
+
+ flags |= HIST_FIELD_FL_VAR;
+ hist_data->n_vars++;
+ if (WARN_ON(hist_data->n_vars > TRACING_MAP_VARS_MAX))
+ return -EINVAL;
+
+ return __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags);
+}
+
static int create_val_fields(struct hist_trigger_data *hist_data,
struct trace_event_file *file)
{
char *fields_str, *field_str;
- unsigned int i, j;
+ unsigned int i, j = 1;
int ret;
ret = create_hitcount_val(hist_data);
@@ -596,12 +722,15 @@ static int create_val_fields(struct hist
field_str = strsep(&fields_str, ",");
if (!field_str)
break;
+
if (strcmp(field_str, "hitcount") == 0)
continue;
+
ret = create_val_field(hist_data, j++, file, field_str);
if (ret)
goto out;
}
+
if (fields_str && (strcmp(fields_str, "hitcount") != 0))
ret = -EINVAL;
out:
@@ -615,11 +744,12 @@ static int create_key_field(struct hist_
char *field_str)
{
struct ftrace_event_field *field = NULL;
+ struct hist_field *hist_field = NULL;
unsigned long flags = 0;
unsigned int key_size;
int ret = 0;
- if (WARN_ON(key_idx >= TRACING_MAP_FIELDS_MAX))
+ if (WARN_ON(key_idx >= HIST_FIELDS_MAX))
return -EINVAL;
flags |= HIST_FIELD_FL_KEY;
@@ -627,6 +757,7 @@ static int create_key_field(struct hist_
if (strcmp(field_str, "stacktrace") == 0) {
flags |= HIST_FIELD_FL_STACKTRACE;
key_size = sizeof(unsigned long) * HIST_STACKTRACE_DEPTH;
+ hist_field = create_hist_field(hist_data, NULL, flags, NULL);
} else {
char *field_name = strsep(&field_str, ".");
@@ -672,7 +803,7 @@ static int create_key_field(struct hist_
}
}
- hist_data->fields[key_idx] = create_hist_field(hist_data, field, flags);
+ hist_data->fields[key_idx] = create_hist_field(hist_data, field, flags, NULL);
if (!hist_data->fields[key_idx]) {
ret = -ENOMEM;
goto out;
@@ -688,6 +819,7 @@ static int create_key_field(struct hist_
}
hist_data->n_keys++;
+ hist_data->n_fields++;
if (WARN_ON(hist_data->n_keys > TRACING_MAP_KEYS_MAX))
return -EINVAL;
@@ -731,21 +863,111 @@ static int create_key_fields(struct hist
return ret;
}
+static int create_var_fields(struct hist_trigger_data *hist_data,
+ struct trace_event_file *file)
+{
+ unsigned int i, j = hist_data->n_vals;
+ int ret = 0;
+
+ unsigned int n_vars = hist_data->attrs->var_defs.n_vars;
+
+ for (i = 0; i < n_vars; i++) {
+ char *var_name = hist_data->attrs->var_defs.name[i];
+ char *expr = hist_data->attrs->var_defs.expr[i];
+
+ ret = create_var_field(hist_data, j++, file, var_name, expr);
+ if (ret)
+ goto out;
+ }
+ out:
+ return ret;
+}
+
+static void free_var_defs(struct hist_trigger_data *hist_data)
+{
+ unsigned int i;
+
+ for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) {
+ kfree(hist_data->attrs->var_defs.name[i]);
+ kfree(hist_data->attrs->var_defs.expr[i]);
+ }
+
+ hist_data->attrs->var_defs.n_vars = 0;
+}
+
+static int parse_var_defs(struct hist_trigger_data *hist_data)
+{
+ char *s, *str, *var_name, *field_str;
+ unsigned int i, j, n_vars = 0;
+ int ret = 0;
+
+ for (i = 0; i < hist_data->attrs->n_assignments; i++) {
+ str = hist_data->attrs->assignment_str[i];
+ for (j = 0; j < TRACING_MAP_VARS_MAX; j++) {
+ field_str = strsep(&str, ",");
+ if (!field_str)
+ break;
+
+ var_name = strsep(&field_str, "=");
+ if (!var_name || !field_str) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ if (n_vars == TRACING_MAP_VARS_MAX) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ s = kstrdup(var_name, GFP_KERNEL);
+ if (!s) {
+ ret = -ENOMEM;
+ goto free;
+ }
+ hist_data->attrs->var_defs.name[n_vars] = s;
+
+ s = kstrdup(field_str, GFP_KERNEL);
+ if (!s) {
+ kfree(hist_data->attrs->var_defs.name[n_vars]);
+ ret = -ENOMEM;
+ goto free;
+ }
+ hist_data->attrs->var_defs.expr[n_vars++] = s;
+
+ hist_data->attrs->var_defs.n_vars = n_vars;
+ }
+ }
+
+ return ret;
+ free:
+ free_var_defs(hist_data);
+
+ return ret;
+}
+
static int create_hist_fields(struct hist_trigger_data *hist_data,
struct trace_event_file *file)
{
int ret;
+ ret = parse_var_defs(hist_data);
+ if (ret)
+ goto out;
+
ret = create_val_fields(hist_data, file);
if (ret)
goto out;
- ret = create_key_fields(hist_data, file);
+ ret = create_var_fields(hist_data, file);
if (ret)
goto out;
- hist_data->n_fields = hist_data->n_vals + hist_data->n_keys;
+ ret = create_key_fields(hist_data, file);
+ if (ret)
+ goto out;
out:
+ free_var_defs(hist_data);
+
return ret;
}
@@ -768,7 +990,7 @@ static int create_sort_keys(struct hist_
char *fields_str = hist_data->attrs->sort_key_str;
struct tracing_map_sort_key *sort_key;
int descending, ret = 0;
- unsigned int i, j;
+ unsigned int i, j, k;
hist_data->n_sort_keys = 1; /* we always have at least one, hitcount */
@@ -816,12 +1038,19 @@ static int create_sort_keys(struct hist_
continue;
}
- for (j = 1; j < hist_data->n_fields; j++) {
+ for (j = 1, k = 1; j < hist_data->n_fields; j++) {
+ unsigned int idx;
+
hist_field = hist_data->fields[j];
+ if (hist_field->flags & HIST_FIELD_FL_VAR)
+ continue;
+
+ idx = k++;
+
test_name = hist_field_name(hist_field, 0);
if (strcmp(field_name, test_name) == 0) {
- sort_key->field_idx = j;
+ sort_key->field_idx = idx;
descending = is_descending(field_str);
if (descending < 0) {
ret = descending;
@@ -836,6 +1065,7 @@ static int create_sort_keys(struct hist_
break;
}
}
+
hist_data->n_sort_keys = i;
out:
return ret;
@@ -876,12 +1106,19 @@ static int create_tracing_map_fields(str
idx = tracing_map_add_key_field(map,
hist_field->offset,
cmp_fn);
-
- } else
+ } else if (!(hist_field->flags & HIST_FIELD_FL_VAR))
idx = tracing_map_add_sum_field(map);
if (idx < 0)
return idx;
+
+ if (hist_field->flags & HIST_FIELD_FL_VAR) {
+ idx = tracing_map_add_var(map);
+ if (idx < 0)
+ return idx;
+ hist_field->var.idx = idx;
+ hist_field->var.hist_data = hist_data;
+ }
}
return 0;
@@ -905,7 +1142,8 @@ static bool need_tracing_map_ops(struct
static struct hist_trigger_data *
create_hist_data(unsigned int map_bits,
struct hist_trigger_attrs *attrs,
- struct trace_event_file *file)
+ struct trace_event_file *file,
+ bool remove)
{
const struct tracing_map_ops *map_ops = NULL;
struct hist_trigger_data *hist_data;
@@ -916,6 +1154,7 @@ create_hist_data(unsigned int map_bits,
return ERR_PTR(-ENOMEM);
hist_data->attrs = attrs;
+ hist_data->remove = remove;
ret = create_hist_fields(hist_data, file);
if (ret)
@@ -962,14 +1201,28 @@ static void hist_trigger_elt_update(stru
struct ring_buffer_event *rbe)
{
struct hist_field *hist_field;
- unsigned int i;
+ unsigned int i, var_idx;
u64 hist_val;
for_each_hist_val_field(i, hist_data) {
hist_field = hist_data->fields[i];
hist_val = hist_field->fn(hist_field, rec, rbe);
+ if (hist_field->flags & HIST_FIELD_FL_VAR) {
+ var_idx = hist_field->var.idx;
+ tracing_map_set_var(elt, var_idx, hist_val);
+ continue;
+ }
tracing_map_update_sum(elt, i, hist_val);
}
+
+ for_each_hist_key_field(i, hist_data) {
+ hist_field = hist_data->fields[i];
+ if (hist_field->flags & HIST_FIELD_FL_VAR) {
+ hist_val = hist_field->fn(hist_field, rec, rbe);
+ var_idx = hist_field->var.idx;
+ tracing_map_set_var(elt, var_idx, hist_val);
+ }
+ }
}
static inline void add_to_key(char *compound_key, void *key,
@@ -1144,6 +1397,9 @@ hist_trigger_entry_print(struct seq_file
for (i = 1; i < hist_data->n_vals; i++) {
field_name = hist_field_name(hist_data->fields[i], 0);
+ if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR)
+ continue;
+
if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) {
seq_printf(m, " %s: %10llx", field_name,
tracing_map_read_sum(elt, i));
@@ -1267,6 +1523,9 @@ static void hist_field_print(struct seq_
{
const char *field_name = hist_field_name(hist_field, 0);
+ if (hist_field->var.name)
+ seq_printf(m, "%s=", hist_field->var.name);
+
if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP)
seq_puts(m, "common_timestamp");
else if (field_name)
@@ -1285,7 +1544,8 @@ static int event_hist_trigger_print(stru
struct event_trigger_data *data)
{
struct hist_trigger_data *hist_data = data->private_data;
- struct hist_field *key_field;
+ struct hist_field *field;
+ bool have_var = false;
unsigned int i;
seq_puts(m, "hist:");
@@ -1296,25 +1556,47 @@ static int event_hist_trigger_print(stru
seq_puts(m, "keys=");
for_each_hist_key_field(i, hist_data) {
- key_field = hist_data->fields[i];
+ field = hist_data->fields[i];
if (i > hist_data->n_vals)
seq_puts(m, ",");
- if (key_field->flags & HIST_FIELD_FL_STACKTRACE)
+ if (field->flags & HIST_FIELD_FL_STACKTRACE)
seq_puts(m, "stacktrace");
else
- hist_field_print(m, key_field);
+ hist_field_print(m, field);
}
seq_puts(m, ":vals=");
for_each_hist_val_field(i, hist_data) {
+ field = hist_data->fields[i];
+ if (field->flags & HIST_FIELD_FL_VAR) {
+ have_var = true;
+ continue;
+ }
+
if (i == HITCOUNT_IDX)
seq_puts(m, "hitcount");
else {
seq_puts(m, ",");
- hist_field_print(m, hist_data->fields[i]);
+ hist_field_print(m, field);
+ }
+ }
+
+ if (have_var) {
+ unsigned int n = 0;
+
+ seq_puts(m, ":");
+
+ for_each_hist_val_field(i, hist_data) {
+ field = hist_data->fields[i];
+
+ if (field->flags & HIST_FIELD_FL_VAR) {
+ if (n++)
+ seq_puts(m, ",");
+ hist_field_print(m, field);
+ }
}
}
@@ -1322,7 +1604,10 @@ static int event_hist_trigger_print(stru
for (i = 0; i < hist_data->n_sort_keys; i++) {
struct tracing_map_sort_key *sort_key;
- unsigned int idx;
+ unsigned int idx, first_key_idx;
+
+ /* skip VAR vals */
+ first_key_idx = hist_data->n_vals - hist_data->n_vars;
sort_key = &hist_data->sort_keys[i];
idx = sort_key->field_idx;
@@ -1335,8 +1620,11 @@ static int event_hist_trigger_print(stru
if (idx == HITCOUNT_IDX)
seq_puts(m, "hitcount");
- else
+ else {
+ if (idx >= first_key_idx)
+ idx += hist_data->n_vars;
hist_field_print(m, hist_data->fields[idx]);
+ }
if (sort_key->descending)
seq_puts(m, ".descending");
@@ -1633,7 +1921,7 @@ static void hist_unregister_trigger(char
test->ops->free(test->ops, test);
if (hist_data->enable_timestamps) {
- if (unregistered)
+ if (!hist_data->remove || unregistered)
tracing_set_time_stamp_abs(file->tr, false);
}
}
@@ -1666,12 +1954,16 @@ static int event_hist_trigger_func(struc
struct hist_trigger_attrs *attrs;
struct event_trigger_ops *trigger_ops;
struct hist_trigger_data *hist_data;
+ bool remove = false;
char *trigger;
int ret = 0;
if (!param)
return -EINVAL;
+ if (glob[0] == '!')
+ remove = true;
+
/* separate the trigger from the filter (k:v [if filter]) */
trigger = strsep(&param, " \t");
if (!trigger)
@@ -1684,7 +1976,7 @@ static int event_hist_trigger_func(struc
if (attrs->map_bits)
hist_trigger_bits = attrs->map_bits;
- hist_data = create_hist_data(hist_trigger_bits, attrs, file);
+ hist_data = create_hist_data(hist_trigger_bits, attrs, file, remove);
if (IS_ERR(hist_data)) {
destroy_hist_trigger_attrs(attrs);
return PTR_ERR(hist_data);
@@ -1713,7 +2005,7 @@ static int event_hist_trigger_func(struc
goto out_free;
}
- if (glob[0] == '!') {
+ if (remove) {
cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
ret = 0;
goto out_free;

View File

@ -1,91 +0,0 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 11 Mar 2018 15:15:46 -0400
Subject: [PATCH 16/17] fold dentry_lock_for_move() into its sole caller and
clean it up
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit 42177007aa277af3e37bf2ae3efdfe795c81d700
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 49 +++++++++++++++++++++++--------------------------
1 file changed, 23 insertions(+), 26 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2773,25 +2773,6 @@ static void copy_name(struct dentry *den
kfree_rcu(old_name, u.head);
}
-static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
-{
- if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
- spin_lock(&target->d_parent->d_lock);
- else {
- if (d_ancestor(dentry->d_parent, target->d_parent)) {
- spin_lock(&dentry->d_parent->d_lock);
- spin_lock_nested(&target->d_parent->d_lock,
- DENTRY_D_LOCK_NESTED);
- } else {
- spin_lock(&target->d_parent->d_lock);
- spin_lock_nested(&dentry->d_parent->d_lock,
- DENTRY_D_LOCK_NESTED);
- }
- }
- spin_lock_nested(&dentry->d_lock, 2);
- spin_lock_nested(&target->d_lock, 3);
-}
-
/*
* __d_move - move a dentry
* @dentry: entry to move
@@ -2806,16 +2787,34 @@ static void dentry_lock_for_move(struct
static void __d_move(struct dentry *dentry, struct dentry *target,
bool exchange)
{
- struct dentry *old_parent;
+ struct dentry *old_parent, *p;
struct inode *dir = NULL;
unsigned n;
- if (!dentry->d_inode)
- printk(KERN_WARNING "VFS: moving negative dcache entry\n");
- BUG_ON(d_ancestor(dentry, target));
+ WARN_ON(!dentry->d_inode);
+ if (WARN_ON(dentry == target))
+ return;
+
BUG_ON(d_ancestor(target, dentry));
+ old_parent = dentry->d_parent;
+ p = d_ancestor(old_parent, target);
+ if (IS_ROOT(dentry)) {
+ BUG_ON(p);
+ spin_lock(&target->d_parent->d_lock);
+ } else if (!p) {
+ /* target is not a descendent of dentry->d_parent */
+ spin_lock(&target->d_parent->d_lock);
+ spin_lock_nested(&old_parent->d_lock, DENTRY_D_LOCK_NESTED);
+ } else {
+ BUG_ON(p == dentry);
+ spin_lock(&old_parent->d_lock);
+ if (p != target)
+ spin_lock_nested(&target->d_parent->d_lock,
+ DENTRY_D_LOCK_NESTED);
+ }
+ spin_lock_nested(&dentry->d_lock, 2);
+ spin_lock_nested(&target->d_lock, 3);
- dentry_lock_for_move(dentry, target);
if (unlikely(d_in_lookup(target))) {
dir = target->d_parent->d_inode;
n = start_dir_add(dir);
@@ -2825,8 +2824,6 @@ static void __d_move(struct dentry *dent
write_seqcount_begin(&dentry->d_seq);
write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
- old_parent = dentry->d_parent;
-
/* unhash both */
if (!d_unhashed(dentry))
___d_drop(dentry);

View File

@ -1,98 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 20 Dec 2017 17:13:05 +0100
Subject: [PATCH 16/29] hrtimer: Make hrtimer_cpu_base.next_timer handling
unconditional
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
hrtimer_cpu_base.next_timer stores the pointer to the next expiring timer
in a cpu base.
This pointer cannot be dereferenced and is solely used to check whether a
hrtimer which is removed is the hrtimer which is the first to expire in the
CPU base. If this is the case, then the timer hardware needs to be
reprogrammed to avoid an extra interrupt for nothing.
Again, this is conditional functionality, but there is no compelling reason
to make this conditional. As a preparation, hrtimer_cpu_base.next_timer
needs to be available unconditonal. Aside of that the upcoming support for
softirq based hrtimers requires access to this pointer unconditionally.
Make the update of hrtimer_cpu_base.next_timer unconditional and remove the
ifdef cruft. The impact on CONFIG_HIGH_RES_TIMERS=n && CONFIG_NOHZ=n is
marginal as it's just a store on an already dirtied cacheline.
No functional change.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/hrtimer.h | 4 ++--
kernel/time/hrtimer.c | 12 ++----------
2 files changed, 4 insertions(+), 12 deletions(-)
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -164,13 +164,13 @@ enum hrtimer_base_type {
* @hres_active: State of high resolution mode
* @in_hrtirq: hrtimer_interrupt() is currently executing
* @hang_detected: The last hrtimer interrupt detected a hang
- * @next_timer: Pointer to the first expiring timer
* @nr_events: Total number of hrtimer interrupt events
* @nr_retries: Total number of hrtimer interrupt retries
* @nr_hangs: Total number of hrtimer interrupt hangs
* @max_hang_time: Maximum time spent in hrtimer_interrupt
* @expires_next: absolute time of the next event, is required for remote
* hrtimer enqueue
+ * @next_timer: Pointer to the first expiring timer
* @clock_base: array of clock bases for this cpu
*
* Note: next_timer is just an optimization for __remove_hrtimer().
@@ -186,13 +186,13 @@ struct hrtimer_cpu_base {
#ifdef CONFIG_HIGH_RES_TIMERS
unsigned int in_hrtirq : 1,
hang_detected : 1;
- struct hrtimer *next_timer;
unsigned int nr_events;
unsigned short nr_retries;
unsigned short nr_hangs;
unsigned int max_hang_time;
#endif
ktime_t expires_next;
+ struct hrtimer *next_timer;
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
} ____cacheline_aligned;
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -459,21 +459,13 @@ static struct hrtimer_clock_base *
while ((base = __next_base((cpu_base), &(active))))
#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
-static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base,
- struct hrtimer *timer)
-{
-#ifdef CONFIG_HIGH_RES_TIMERS
- cpu_base->next_timer = timer;
-#endif
-}
-
static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
{
struct hrtimer_clock_base *base;
unsigned int active = cpu_base->active_bases;
ktime_t expires, expires_next = KTIME_MAX;
- hrtimer_update_next_timer(cpu_base, NULL);
+ cpu_base->next_timer = NULL;
for_each_active_base(base, cpu_base, active) {
struct timerqueue_node *next;
struct hrtimer *timer;
@@ -483,7 +475,7 @@ static ktime_t __hrtimer_get_next_event(
expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
if (expires < expires_next) {
expires_next = expires;
- hrtimer_update_next_timer(cpu_base, timer);
+ cpu_base->next_timer = timer;
}
}
/*

View File

@ -1,42 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:50 -0600
Subject: [PATCH 16/37] tracing: Account for variables in named trigger
compatibility
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Named triggers must also have the same set of variables in order to be
considered compatible - update the trigger match test to account for
that.
The reason for this requirement is that named triggers with variables
are meant to allow one or more events to set the same variable.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1612,7 +1612,7 @@ static int event_hist_trigger_print(stru
sort_key = &hist_data->sort_keys[i];
idx = sort_key->field_idx;
- if (WARN_ON(idx >= TRACING_MAP_FIELDS_MAX))
+ if (WARN_ON(idx >= HIST_FIELDS_MAX))
return -EINVAL;
if (i > 0)
@@ -1800,6 +1800,11 @@ static bool hist_trigger_match(struct ev
return false;
if (key_field->is_signed != key_field_test->is_signed)
return false;
+ if (!!key_field->var.name != !!key_field_test->var.name)
+ return false;
+ if (key_field->var.name &&
+ strcmp(key_field->var.name, key_field_test->var.name) != 0)
+ return false;
}
for (i = 0; i < hist_data->n_sort_keys; i++) {

View File

@ -1,303 +0,0 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:42 -0600
Subject: [PATCH 16/48] tracing: Give event triggers access to
ring_buffer_event
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
The ring_buffer event can provide a timestamp that may be useful to
various triggers - pass it into the handlers for that purpose.
Link: http://lkml.kernel.org/r/6de592683b59fa70ffa5d43d0109896623fc1367.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 373514437a6f75b5cfe890742b590f2c12f6c335)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/trace_events.h | 14 ++++++----
kernel/trace/trace.h | 9 +++---
kernel/trace/trace_events_hist.c | 11 +++++---
kernel/trace/trace_events_trigger.c | 47 ++++++++++++++++++++++--------------
4 files changed, 49 insertions(+), 32 deletions(-)
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -430,11 +430,13 @@ enum event_trigger_type {
extern int filter_match_preds(struct event_filter *filter, void *rec);
-extern enum event_trigger_type event_triggers_call(struct trace_event_file *file,
- void *rec);
-extern void event_triggers_post_call(struct trace_event_file *file,
- enum event_trigger_type tt,
- void *rec);
+extern enum event_trigger_type
+event_triggers_call(struct trace_event_file *file, void *rec,
+ struct ring_buffer_event *event);
+extern void
+event_triggers_post_call(struct trace_event_file *file,
+ enum event_trigger_type tt,
+ void *rec, struct ring_buffer_event *event);
bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
@@ -454,7 +456,7 @@ trace_trigger_soft_disabled(struct trace
if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) {
if (eflags & EVENT_FILE_FL_TRIGGER_MODE)
- event_triggers_call(file, NULL);
+ event_triggers_call(file, NULL, NULL);
if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
return true;
if (eflags & EVENT_FILE_FL_PID_FILTER)
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1294,7 +1294,7 @@ static inline bool
unsigned long eflags = file->flags;
if (eflags & EVENT_FILE_FL_TRIGGER_COND)
- *tt = event_triggers_call(file, entry);
+ *tt = event_triggers_call(file, entry, event);
if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
(unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
@@ -1331,7 +1331,7 @@ event_trigger_unlock_commit(struct trace
trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc);
if (tt)
- event_triggers_post_call(file, tt, entry);
+ event_triggers_post_call(file, tt, entry, event);
}
/**
@@ -1364,7 +1364,7 @@ event_trigger_unlock_commit_regs(struct
irq_flags, pc, regs);
if (tt)
- event_triggers_post_call(file, tt, entry);
+ event_triggers_post_call(file, tt, entry, event);
}
#define FILTER_PRED_INVALID ((unsigned short)-1)
@@ -1589,7 +1589,8 @@ extern int register_trigger_hist_enable_
*/
struct event_trigger_ops {
void (*func)(struct event_trigger_data *data,
- void *rec);
+ void *rec,
+ struct ring_buffer_event *rbe);
int (*init)(struct event_trigger_ops *ops,
struct event_trigger_data *data);
void (*free)(struct event_trigger_ops *ops,
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -909,7 +909,8 @@ static inline void add_to_key(char *comp
memcpy(compound_key + key_field->offset, key, size);
}
-static void event_hist_trigger(struct event_trigger_data *data, void *rec)
+static void event_hist_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
struct hist_trigger_data *hist_data = data->private_data;
bool use_compound_key = (hist_data->n_keys > 1);
@@ -1658,7 +1659,8 @@ static struct event_command trigger_hist
}
static void
-hist_enable_trigger(struct event_trigger_data *data, void *rec)
+hist_enable_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
struct enable_trigger_data *enable_data = data->private_data;
struct event_trigger_data *test;
@@ -1674,7 +1676,8 @@ hist_enable_trigger(struct event_trigger
}
static void
-hist_enable_count_trigger(struct event_trigger_data *data, void *rec)
+hist_enable_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (!data->count)
return;
@@ -1682,7 +1685,7 @@ hist_enable_count_trigger(struct event_t
if (data->count != -1)
(data->count)--;
- hist_enable_trigger(data, rec);
+ hist_enable_trigger(data, rec, event);
}
static struct event_trigger_ops hist_enable_trigger_ops = {
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -63,7 +63,8 @@ void trigger_data_free(struct event_trig
* any trigger that should be deferred, ETT_NONE if nothing to defer.
*/
enum event_trigger_type
-event_triggers_call(struct trace_event_file *file, void *rec)
+event_triggers_call(struct trace_event_file *file, void *rec,
+ struct ring_buffer_event *event)
{
struct event_trigger_data *data;
enum event_trigger_type tt = ETT_NONE;
@@ -76,7 +77,7 @@ event_triggers_call(struct trace_event_f
if (data->paused)
continue;
if (!rec) {
- data->ops->func(data, rec);
+ data->ops->func(data, rec, event);
continue;
}
filter = rcu_dereference_sched(data->filter);
@@ -86,7 +87,7 @@ event_triggers_call(struct trace_event_f
tt |= data->cmd_ops->trigger_type;
continue;
}
- data->ops->func(data, rec);
+ data->ops->func(data, rec, event);
}
return tt;
}
@@ -108,7 +109,7 @@ EXPORT_SYMBOL_GPL(event_triggers_call);
void
event_triggers_post_call(struct trace_event_file *file,
enum event_trigger_type tt,
- void *rec)
+ void *rec, struct ring_buffer_event *event)
{
struct event_trigger_data *data;
@@ -116,7 +117,7 @@ event_triggers_post_call(struct trace_ev
if (data->paused)
continue;
if (data->cmd_ops->trigger_type & tt)
- data->ops->func(data, rec);
+ data->ops->func(data, rec, event);
}
}
EXPORT_SYMBOL_GPL(event_triggers_post_call);
@@ -909,7 +910,8 @@ void set_named_trigger_data(struct event
}
static void
-traceon_trigger(struct event_trigger_data *data, void *rec)
+traceon_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (tracing_is_on())
return;
@@ -918,7 +920,8 @@ traceon_trigger(struct event_trigger_dat
}
static void
-traceon_count_trigger(struct event_trigger_data *data, void *rec)
+traceon_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (tracing_is_on())
return;
@@ -933,7 +936,8 @@ traceon_count_trigger(struct event_trigg
}
static void
-traceoff_trigger(struct event_trigger_data *data, void *rec)
+traceoff_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (!tracing_is_on())
return;
@@ -942,7 +946,8 @@ traceoff_trigger(struct event_trigger_da
}
static void
-traceoff_count_trigger(struct event_trigger_data *data, void *rec)
+traceoff_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (!tracing_is_on())
return;
@@ -1039,13 +1044,15 @@ static struct event_command trigger_trac
#ifdef CONFIG_TRACER_SNAPSHOT
static void
-snapshot_trigger(struct event_trigger_data *data, void *rec)
+snapshot_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
tracing_snapshot();
}
static void
-snapshot_count_trigger(struct event_trigger_data *data, void *rec)
+snapshot_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (!data->count)
return;
@@ -1053,7 +1060,7 @@ snapshot_count_trigger(struct event_trig
if (data->count != -1)
(data->count)--;
- snapshot_trigger(data, rec);
+ snapshot_trigger(data, rec, event);
}
static int
@@ -1141,13 +1148,15 @@ static __init int register_trigger_snaps
#endif
static void
-stacktrace_trigger(struct event_trigger_data *data, void *rec)
+stacktrace_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
trace_dump_stack(STACK_SKIP);
}
static void
-stacktrace_count_trigger(struct event_trigger_data *data, void *rec)
+stacktrace_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (!data->count)
return;
@@ -1155,7 +1164,7 @@ stacktrace_count_trigger(struct event_tr
if (data->count != -1)
(data->count)--;
- stacktrace_trigger(data, rec);
+ stacktrace_trigger(data, rec, event);
}
static int
@@ -1217,7 +1226,8 @@ static __init void unregister_trigger_tr
}
static void
-event_enable_trigger(struct event_trigger_data *data, void *rec)
+event_enable_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
struct enable_trigger_data *enable_data = data->private_data;
@@ -1228,7 +1238,8 @@ event_enable_trigger(struct event_trigge
}
static void
-event_enable_count_trigger(struct event_trigger_data *data, void *rec)
+event_enable_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
struct enable_trigger_data *enable_data = data->private_data;
@@ -1242,7 +1253,7 @@ event_enable_count_trigger(struct event_
if (data->count != -1)
(data->count)--;
- event_enable_trigger(data, rec);
+ event_enable_trigger(data, rec, event);
}
int event_enable_trigger_print(struct seq_file *m,

View File

@ -1,33 +0,0 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 29 Mar 2018 15:08:21 -0400
Subject: [PATCH 17/17] d_genocide: move export to definition
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz
Upstream commit cbd4a5bcb25b5ed0c1c64bc969b893cad9b78acc
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3095,6 +3095,8 @@ void d_genocide(struct dentry *parent)
d_walk(parent, parent, d_genocide_kill, NULL);
}
+EXPORT_SYMBOL(d_genocide);
+
void d_tmpfile(struct dentry *dentry, struct inode *inode)
{
inode_dec_link_count(inode);
@@ -3174,8 +3176,6 @@ static void __init dcache_init(void)
struct kmem_cache *names_cachep __read_mostly;
EXPORT_SYMBOL(names_cachep);
-EXPORT_SYMBOL(d_genocide);
-
void __init vfs_caches_init_early(void)
{
int i;

View File

@ -1,187 +0,0 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Wed, 20 Dec 2017 17:13:06 +0100
Subject: [PATCH 17/29] hrtimer: Make hrtimer_reprogramm() unconditional
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
hrtimer_reprogram() needs to be available unconditionally for softirq based
hrtimers. Move the function and all required struct members out of the
CONFIG_HIGH_RES_TIMERS #ifdef.
There is no functional change because hrtimer_reprogram() is only invoked
when hrtimer_cpu_base.hres_active is true. Making it unconditional
increases the text size for the CONFIG_HIGH_RES_TIMERS=n case, but avoids
replication of that code for the upcoming softirq based hrtimers support.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/hrtimer.h | 6 +-
kernel/time/hrtimer.c | 129 +++++++++++++++++++++++-------------------------
2 files changed, 65 insertions(+), 70 deletions(-)
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -182,10 +182,10 @@ struct hrtimer_cpu_base {
unsigned int cpu;
unsigned int active_bases;
unsigned int clock_was_set_seq;
- unsigned int hres_active : 1;
-#ifdef CONFIG_HIGH_RES_TIMERS
- unsigned int in_hrtirq : 1,
+ unsigned int hres_active : 1,
+ in_hrtirq : 1,
hang_detected : 1;
+#ifdef CONFIG_HIGH_RES_TIMERS
unsigned int nr_events;
unsigned short nr_retries;
unsigned short nr_hangs;
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -582,68 +582,6 @@ hrtimer_force_reprogram(struct hrtimer_c
}
/*
- * When a timer is enqueued and expires earlier than the already enqueued
- * timers, we have to check, whether it expires earlier than the timer for
- * which the clock event device was armed.
- *
- * Called with interrupts disabled and base->cpu_base.lock held
- */
-static void hrtimer_reprogram(struct hrtimer *timer,
- struct hrtimer_clock_base *base)
-{
- struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
- ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
-
- WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
-
- /*
- * If the timer is not on the current cpu, we cannot reprogram
- * the other cpus clock event device.
- */
- if (base->cpu_base != cpu_base)
- return;
-
- /*
- * If the hrtimer interrupt is running, then it will
- * reevaluate the clock bases and reprogram the clock event
- * device. The callbacks are always executed in hard interrupt
- * context so we don't need an extra check for a running
- * callback.
- */
- if (cpu_base->in_hrtirq)
- return;
-
- /*
- * CLOCK_REALTIME timer might be requested with an absolute
- * expiry time which is less than base->offset. Set it to 0.
- */
- if (expires < 0)
- expires = 0;
-
- if (expires >= cpu_base->expires_next)
- return;
-
- /* Update the pointer to the next expiring timer */
- cpu_base->next_timer = timer;
-
- /*
- * If a hang was detected in the last timer interrupt then we
- * do not schedule a timer which is earlier than the expiry
- * which we enforced in the hang detection. We want the system
- * to make progress.
- */
- if (cpu_base->hang_detected)
- return;
-
- /*
- * Program the timer hardware. We enforce the expiry for
- * events which are already in the past.
- */
- cpu_base->expires_next = expires;
- tick_program_event(expires, 1);
-}
-
-/*
* Retrigger next event is called after clock was set
*
* Called with interrupts disabled via on_each_cpu()
@@ -703,16 +641,73 @@ static inline int hrtimer_is_hres_enable
static inline void hrtimer_switch_to_hres(void) { }
static inline void
hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
-static inline int hrtimer_reprogram(struct hrtimer *timer,
- struct hrtimer_clock_base *base)
-{
- return 0;
-}
static inline void retrigger_next_event(void *arg) { }
#endif /* CONFIG_HIGH_RES_TIMERS */
/*
+ * When a timer is enqueued and expires earlier than the already enqueued
+ * timers, we have to check, whether it expires earlier than the timer for
+ * which the clock event device was armed.
+ *
+ * Called with interrupts disabled and base->cpu_base.lock held
+ */
+static void hrtimer_reprogram(struct hrtimer *timer,
+ struct hrtimer_clock_base *base)
+{
+ struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+ ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
+
+ WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
+
+ /*
+ * If the timer is not on the current cpu, we cannot reprogram
+ * the other cpus clock event device.
+ */
+ if (base->cpu_base != cpu_base)
+ return;
+
+ /*
+ * If the hrtimer interrupt is running, then it will
+ * reevaluate the clock bases and reprogram the clock event
+ * device. The callbacks are always executed in hard interrupt
+ * context so we don't need an extra check for a running
+ * callback.
+ */
+ if (cpu_base->in_hrtirq)
+ return;
+
+ /*
+ * CLOCK_REALTIME timer might be requested with an absolute
+ * expiry time which is less than base->offset. Set it to 0.
+ */
+ if (expires < 0)
+ expires = 0;
+
+ if (expires >= cpu_base->expires_next)
+ return;
+
+ /* Update the pointer to the next expiring timer */
+ cpu_base->next_timer = timer;
+
+ /*
+ * If a hang was detected in the last timer interrupt then we
+ * do not schedule a timer which is earlier than the expiry
+ * which we enforced in the hang detection. We want the system
+ * to make progress.
+ */
+ if (cpu_base->hang_detected)
+ return;
+
+ /*
+ * Program the timer hardware. We enforce the expiry for
+ * events which are already in the past.
+ */
+ cpu_base->expires_next = expires;
+ tick_program_event(expires, 1);
+}
+
+/*
* Clock realtime was set
*
* Change the offset of the realtime clock vs. the monotonic

Some files were not shown because too many files have changed in this diff Show More