435 lines
13 KiB
Diff
435 lines
13 KiB
Diff
From b895a9e6ffef5bcb3f28d209398d7e5932fa63b0 Mon Sep 17 00:00:00 2001
|
|
From: Steven Rostedt <rostedt@goodmis.org>
|
|
Date: Tue, 27 Sep 2011 13:56:50 -0400
|
|
Subject: [PATCH 177/279] ring-buffer: Convert reader_lock from raw_spin_lock
|
|
into spin_lock
|
|
|
|
The reader_lock is mostly taken in normal context with interrupts enabled.
|
|
But because ftrace_dump() can happen anywhere, it is used as a spin lock
|
|
and in some cases a check to in_nmi() is performed to determine if the
|
|
ftrace_dump() was initiated from an NMI and if it is, the lock is not taken.
|
|
|
|
But having the lock as a raw_spin_lock() causes issues with the real-time
|
|
kernel as the lock is held during allocation and freeing of the buffer.
|
|
As memory locks convert into mutexes, keeping the reader_lock as a spin_lock
|
|
causes problems.
|
|
|
|
Converting the reader_lock is not straight forward as we must still deal
|
|
with the ftrace_dump() happening not only from an NMI but also from
|
|
true interrupt context in PREEPMT_RT.
|
|
|
|
Two wrapper functions are created to take and release the reader lock:
|
|
|
|
int read_buffer_lock(cpu_buffer, unsigned long *flags)
|
|
void read_buffer_unlock(cpu_buffer, unsigned long flags, int locked)
|
|
|
|
The read_buffer_lock() returns 1 if it actually took the lock, disables
|
|
interrupts and updates the flags. The only time it returns 0 is in the
|
|
case of a ftrace_dump() happening in an unsafe context.
|
|
|
|
The read_buffer_unlock() checks the return of locked and will simply
|
|
unlock the spin lock if it was successfully taken.
|
|
|
|
Instead of just having this in specific cases that the NMI might call
|
|
into, all instances of the reader_lock is converted to the wrapper
|
|
functions to make this a bit simpler to read and less error prone.
|
|
|
|
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
|
Cc: Peter Zijlstra <peterz@infradead.org>
|
|
Cc: Clark Williams <clark@redhat.com>
|
|
Link: http://lkml.kernel.org/r/1317146210.26514.33.camel@gandalf.stny.rr.com
|
|
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
|
---
|
|
kernel/trace/ring_buffer.c | 151 ++++++++++++++++++++++++--------------------
|
|
1 file changed, 81 insertions(+), 70 deletions(-)
|
|
|
|
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
|
|
index f5b7b5c..354017f 100644
|
|
--- a/kernel/trace/ring_buffer.c
|
|
+++ b/kernel/trace/ring_buffer.c
|
|
@@ -478,7 +478,7 @@ struct ring_buffer_per_cpu {
|
|
int cpu;
|
|
atomic_t record_disabled;
|
|
struct ring_buffer *buffer;
|
|
- raw_spinlock_t reader_lock; /* serialize readers */
|
|
+ spinlock_t reader_lock; /* serialize readers */
|
|
arch_spinlock_t lock;
|
|
struct lock_class_key lock_key;
|
|
struct list_head *pages;
|
|
@@ -1049,6 +1049,44 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
|
|
return -ENOMEM;
|
|
}
|
|
|
|
+static inline int ok_to_lock(void)
|
|
+{
|
|
+ if (in_nmi())
|
|
+ return 0;
|
|
+#ifdef CONFIG_PREEMPT_RT_FULL
|
|
+ if (in_atomic())
|
|
+ return 0;
|
|
+#endif
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+static int
|
|
+read_buffer_lock(struct ring_buffer_per_cpu *cpu_buffer,
|
|
+ unsigned long *flags)
|
|
+{
|
|
+ /*
|
|
+ * If an NMI die dumps out the content of the ring buffer
|
|
+ * do not grab locks. We also permanently disable the ring
|
|
+ * buffer too. A one time deal is all you get from reading
|
|
+ * the ring buffer from an NMI.
|
|
+ */
|
|
+ if (!ok_to_lock()) {
|
|
+ if (spin_trylock_irqsave(&cpu_buffer->reader_lock, *flags))
|
|
+ return 1;
|
|
+ tracing_off_permanent();
|
|
+ return 0;
|
|
+ }
|
|
+ spin_lock_irqsave(&cpu_buffer->reader_lock, *flags);
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+static void
|
|
+read_buffer_unlock(struct ring_buffer_per_cpu *cpu_buffer,
|
|
+ unsigned long flags, int locked)
|
|
+{
|
|
+ if (locked)
|
|
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
|
|
+}
|
|
static struct ring_buffer_per_cpu *
|
|
rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
|
|
{
|
|
@@ -1064,7 +1102,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
|
|
|
|
cpu_buffer->cpu = cpu;
|
|
cpu_buffer->buffer = buffer;
|
|
- raw_spin_lock_init(&cpu_buffer->reader_lock);
|
|
+ spin_lock_init(&cpu_buffer->reader_lock);
|
|
lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
|
|
cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
|
|
|
|
@@ -1259,9 +1297,11 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
|
|
{
|
|
struct buffer_page *bpage;
|
|
struct list_head *p;
|
|
+ unsigned long flags;
|
|
unsigned i;
|
|
+ int locked;
|
|
|
|
- raw_spin_lock_irq(&cpu_buffer->reader_lock);
|
|
+ locked = read_buffer_lock(cpu_buffer, &flags);
|
|
rb_head_page_deactivate(cpu_buffer);
|
|
|
|
for (i = 0; i < nr_pages; i++) {
|
|
@@ -1279,7 +1319,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
|
|
rb_check_pages(cpu_buffer);
|
|
|
|
out:
|
|
- raw_spin_unlock_irq(&cpu_buffer->reader_lock);
|
|
+ read_buffer_unlock(cpu_buffer, flags, locked);
|
|
}
|
|
|
|
static void
|
|
@@ -1288,9 +1328,11 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
|
|
{
|
|
struct buffer_page *bpage;
|
|
struct list_head *p;
|
|
+ unsigned long flags;
|
|
unsigned i;
|
|
+ int locked;
|
|
|
|
- raw_spin_lock_irq(&cpu_buffer->reader_lock);
|
|
+ locked = read_buffer_lock(cpu_buffer, &flags);
|
|
rb_head_page_deactivate(cpu_buffer);
|
|
|
|
for (i = 0; i < nr_pages; i++) {
|
|
@@ -1305,7 +1347,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
|
|
rb_check_pages(cpu_buffer);
|
|
|
|
out:
|
|
- raw_spin_unlock_irq(&cpu_buffer->reader_lock);
|
|
+ read_buffer_unlock(cpu_buffer, flags, locked);
|
|
}
|
|
|
|
/**
|
|
@@ -2689,7 +2731,7 @@ unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
|
|
return 0;
|
|
|
|
cpu_buffer = buffer->buffers[cpu];
|
|
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
|
|
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
|
|
/*
|
|
* if the tail is on reader_page, oldest time stamp is on the reader
|
|
* page
|
|
@@ -2699,7 +2741,7 @@ unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
|
|
else
|
|
bpage = rb_set_head_page(cpu_buffer);
|
|
ret = bpage->page->time_stamp;
|
|
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
|
|
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
|
|
|
|
return ret;
|
|
}
|
|
@@ -2863,15 +2905,16 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
|
|
{
|
|
struct ring_buffer_per_cpu *cpu_buffer;
|
|
unsigned long flags;
|
|
+ int locked;
|
|
|
|
if (!iter)
|
|
return;
|
|
|
|
cpu_buffer = iter->cpu_buffer;
|
|
|
|
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
|
|
+ locked = read_buffer_lock(cpu_buffer, &flags);
|
|
rb_iter_reset(iter);
|
|
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
|
|
+ read_buffer_unlock(cpu_buffer, flags, locked);
|
|
}
|
|
EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
|
|
|
|
@@ -3289,21 +3332,6 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
|
|
}
|
|
EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
|
|
|
|
-static inline int rb_ok_to_lock(void)
|
|
-{
|
|
- /*
|
|
- * If an NMI die dumps out the content of the ring buffer
|
|
- * do not grab locks. We also permanently disable the ring
|
|
- * buffer too. A one time deal is all you get from reading
|
|
- * the ring buffer from an NMI.
|
|
- */
|
|
- if (likely(!in_nmi()))
|
|
- return 1;
|
|
-
|
|
- tracing_off_permanent();
|
|
- return 0;
|
|
-}
|
|
-
|
|
/**
|
|
* ring_buffer_peek - peek at the next event to be read
|
|
* @buffer: The ring buffer to read
|
|
@@ -3321,22 +3349,17 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
|
|
struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
|
|
struct ring_buffer_event *event;
|
|
unsigned long flags;
|
|
- int dolock;
|
|
+ int locked;
|
|
|
|
if (!cpumask_test_cpu(cpu, buffer->cpumask))
|
|
return NULL;
|
|
|
|
- dolock = rb_ok_to_lock();
|
|
again:
|
|
- local_irq_save(flags);
|
|
- if (dolock)
|
|
- raw_spin_lock(&cpu_buffer->reader_lock);
|
|
+ locked = read_buffer_lock(cpu_buffer, &flags);
|
|
event = rb_buffer_peek(cpu_buffer, ts, lost_events);
|
|
if (event && event->type_len == RINGBUF_TYPE_PADDING)
|
|
rb_advance_reader(cpu_buffer);
|
|
- if (dolock)
|
|
- raw_spin_unlock(&cpu_buffer->reader_lock);
|
|
- local_irq_restore(flags);
|
|
+ read_buffer_unlock(cpu_buffer, flags, locked);
|
|
|
|
if (event && event->type_len == RINGBUF_TYPE_PADDING)
|
|
goto again;
|
|
@@ -3358,11 +3381,12 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
|
|
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
|
|
struct ring_buffer_event *event;
|
|
unsigned long flags;
|
|
+ int locked;
|
|
|
|
again:
|
|
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
|
|
+ locked = read_buffer_lock(cpu_buffer, &flags);
|
|
event = rb_iter_peek(iter, ts);
|
|
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
|
|
+ read_buffer_unlock(cpu_buffer, flags, locked);
|
|
|
|
if (event && event->type_len == RINGBUF_TYPE_PADDING)
|
|
goto again;
|
|
@@ -3388,9 +3412,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
|
|
struct ring_buffer_per_cpu *cpu_buffer;
|
|
struct ring_buffer_event *event = NULL;
|
|
unsigned long flags;
|
|
- int dolock;
|
|
-
|
|
- dolock = rb_ok_to_lock();
|
|
+ int locked;
|
|
|
|
again:
|
|
/* might be called in atomic */
|
|
@@ -3400,9 +3422,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
|
|
goto out;
|
|
|
|
cpu_buffer = buffer->buffers[cpu];
|
|
- local_irq_save(flags);
|
|
- if (dolock)
|
|
- raw_spin_lock(&cpu_buffer->reader_lock);
|
|
+ locked = read_buffer_lock(cpu_buffer, &flags);
|
|
|
|
event = rb_buffer_peek(cpu_buffer, ts, lost_events);
|
|
if (event) {
|
|
@@ -3410,9 +3430,8 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
|
|
rb_advance_reader(cpu_buffer);
|
|
}
|
|
|
|
- if (dolock)
|
|
- raw_spin_unlock(&cpu_buffer->reader_lock);
|
|
- local_irq_restore(flags);
|
|
+ read_buffer_unlock(cpu_buffer, flags, locked);
|
|
+
|
|
|
|
out:
|
|
preempt_enable();
|
|
@@ -3497,17 +3516,18 @@ ring_buffer_read_start(struct ring_buffer_iter *iter)
|
|
{
|
|
struct ring_buffer_per_cpu *cpu_buffer;
|
|
unsigned long flags;
|
|
+ int locked;
|
|
|
|
if (!iter)
|
|
return;
|
|
|
|
cpu_buffer = iter->cpu_buffer;
|
|
|
|
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
|
|
+ locked = read_buffer_lock(cpu_buffer, &flags);
|
|
arch_spin_lock(&cpu_buffer->lock);
|
|
rb_iter_reset(iter);
|
|
arch_spin_unlock(&cpu_buffer->lock);
|
|
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
|
|
+ read_buffer_unlock(cpu_buffer, flags, locked);
|
|
}
|
|
EXPORT_SYMBOL_GPL(ring_buffer_read_start);
|
|
|
|
@@ -3541,8 +3561,9 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
|
|
struct ring_buffer_event *event;
|
|
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
|
|
unsigned long flags;
|
|
+ int locked;
|
|
|
|
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
|
|
+ locked = read_buffer_lock(cpu_buffer, &flags);
|
|
again:
|
|
event = rb_iter_peek(iter, ts);
|
|
if (!event)
|
|
@@ -3553,7 +3574,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
|
|
|
|
rb_advance_iter(iter);
|
|
out:
|
|
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
|
|
+ read_buffer_unlock(cpu_buffer, flags, locked);
|
|
|
|
return event;
|
|
}
|
|
@@ -3618,13 +3639,14 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
|
|
{
|
|
struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
|
|
unsigned long flags;
|
|
+ int locked;
|
|
|
|
if (!cpumask_test_cpu(cpu, buffer->cpumask))
|
|
return;
|
|
|
|
atomic_inc(&cpu_buffer->record_disabled);
|
|
|
|
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
|
|
+ locked = read_buffer_lock(cpu_buffer, &flags);
|
|
|
|
if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
|
|
goto out;
|
|
@@ -3636,7 +3658,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
|
|
arch_spin_unlock(&cpu_buffer->lock);
|
|
|
|
out:
|
|
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
|
|
+ read_buffer_unlock(cpu_buffer, flags, locked);
|
|
|
|
atomic_dec(&cpu_buffer->record_disabled);
|
|
}
|
|
@@ -3663,22 +3685,16 @@ int ring_buffer_empty(struct ring_buffer *buffer)
|
|
{
|
|
struct ring_buffer_per_cpu *cpu_buffer;
|
|
unsigned long flags;
|
|
- int dolock;
|
|
+ int locked;
|
|
int cpu;
|
|
int ret;
|
|
|
|
- dolock = rb_ok_to_lock();
|
|
-
|
|
/* yes this is racy, but if you don't like the race, lock the buffer */
|
|
for_each_buffer_cpu(buffer, cpu) {
|
|
cpu_buffer = buffer->buffers[cpu];
|
|
- local_irq_save(flags);
|
|
- if (dolock)
|
|
- raw_spin_lock(&cpu_buffer->reader_lock);
|
|
+ locked = read_buffer_lock(cpu_buffer, &flags);
|
|
ret = rb_per_cpu_empty(cpu_buffer);
|
|
- if (dolock)
|
|
- raw_spin_unlock(&cpu_buffer->reader_lock);
|
|
- local_irq_restore(flags);
|
|
+ read_buffer_unlock(cpu_buffer, flags, locked);
|
|
|
|
if (!ret)
|
|
return 0;
|
|
@@ -3697,22 +3713,16 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
|
|
{
|
|
struct ring_buffer_per_cpu *cpu_buffer;
|
|
unsigned long flags;
|
|
- int dolock;
|
|
+ int locked;
|
|
int ret;
|
|
|
|
if (!cpumask_test_cpu(cpu, buffer->cpumask))
|
|
return 1;
|
|
|
|
- dolock = rb_ok_to_lock();
|
|
-
|
|
cpu_buffer = buffer->buffers[cpu];
|
|
- local_irq_save(flags);
|
|
- if (dolock)
|
|
- raw_spin_lock(&cpu_buffer->reader_lock);
|
|
+ locked = read_buffer_lock(cpu_buffer, &flags);
|
|
ret = rb_per_cpu_empty(cpu_buffer);
|
|
- if (dolock)
|
|
- raw_spin_unlock(&cpu_buffer->reader_lock);
|
|
- local_irq_restore(flags);
|
|
+ read_buffer_unlock(cpu_buffer, flags, locked);
|
|
|
|
return ret;
|
|
}
|
|
@@ -3887,6 +3897,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
|
|
unsigned int commit;
|
|
unsigned int read;
|
|
u64 save_timestamp;
|
|
+ int locked;
|
|
int ret = -1;
|
|
|
|
if (!cpumask_test_cpu(cpu, buffer->cpumask))
|
|
@@ -3908,7 +3919,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
|
|
if (!bpage)
|
|
goto out;
|
|
|
|
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
|
|
+ locked = read_buffer_lock(cpu_buffer, &flags);
|
|
|
|
reader = rb_get_reader_page(cpu_buffer);
|
|
if (!reader)
|
|
@@ -4032,7 +4043,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
|
|
memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
|
|
|
|
out_unlock:
|
|
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
|
|
+ read_buffer_unlock(cpu_buffer, flags, locked);
|
|
|
|
out:
|
|
return ret;
|