2018-08-27 14:32:32 +00:00
|
|
|
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
|
|
|
|
Date: Mon, 9 Jul 2018 17:48:54 -0400
|
|
|
|
Subject: [PATCH] cgroup/tracing: Move taking of spin lock out of trace event
|
|
|
|
handlers
|
2018-09-13 17:28:08 +00:00
|
|
|
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18.7-rt5.tar.xz
|
2018-08-27 14:32:32 +00:00
|
|
|
|
|
|
|
[ Upstream commit e4f8d81c738db6d3ffdabfb8329aa2feaa310699 ]
|
|
|
|
|
|
|
|
It is unwise to take spin locks from the handlers of trace events.
|
|
|
|
Mainly, because they can introduce lockups, because it introduces locks
|
|
|
|
in places that are normally not tested. Worse yet, because trace events
|
|
|
|
are tucked away in the include/trace/events/ directory, locks that are
|
|
|
|
taken there are forgotten about.
|
|
|
|
|
|
|
|
As a general rule, I tell people never to take any locks in a trace
|
|
|
|
event handler.
|
|
|
|
|
|
|
|
Several cgroup trace event handlers call cgroup_path() which eventually
|
|
|
|
takes the kernfs_rename_lock spinlock. This injects the spinlock in the
|
|
|
|
code without people realizing it. It also can cause issues for the
|
|
|
|
PREEMPT_RT patch, as the spinlock becomes a mutex, and the trace event
|
|
|
|
handlers are called with preemption disabled.
|
|
|
|
|
|
|
|
By moving the calculation of the cgroup_path() out of the trace event
|
|
|
|
handlers and into a macro (surrounded by a
|
|
|
|
trace_cgroup_##type##_enabled()), then we could place the cgroup_path
|
|
|
|
into a string, and pass that to the trace event. Not only does this
|
|
|
|
remove the taking of the spinlock out of the trace event handler, but
|
|
|
|
it also means that the cgroup_path() only needs to be called once (it
|
|
|
|
is currently called twice, once to get the length to reserver the
|
|
|
|
buffer for, and once again to get the path itself. Now it only needs to
|
|
|
|
be done once.
|
|
|
|
|
|
|
|
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
|
|
|
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
|
|
|
|
Signed-off-by: Tejun Heo <tj@kernel.org>
|
|
|
|
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
|
|
|
---
|
|
|
|
include/trace/events/cgroup.h | 47 +++++++++++++++++++---------------------
|
|
|
|
kernel/cgroup/cgroup-internal.h | 26 ++++++++++++++++++++++
|
|
|
|
kernel/cgroup/cgroup-v1.c | 4 +--
|
|
|
|
kernel/cgroup/cgroup.c | 12 +++++-----
|
|
|
|
4 files changed, 58 insertions(+), 31 deletions(-)
|
|
|
|
|
|
|
|
--- a/include/trace/events/cgroup.h
|
|
|
|
+++ b/include/trace/events/cgroup.h
|
|
|
|
@@ -53,24 +53,22 @@ DEFINE_EVENT(cgroup_root, cgroup_remount
|
|
|
|
|
|
|
|
DECLARE_EVENT_CLASS(cgroup,
|
|
|
|
|
|
|
|
- TP_PROTO(struct cgroup *cgrp),
|
|
|
|
+ TP_PROTO(struct cgroup *cgrp, const char *path),
|
|
|
|
|
|
|
|
- TP_ARGS(cgrp),
|
|
|
|
+ TP_ARGS(cgrp, path),
|
|
|
|
|
|
|
|
TP_STRUCT__entry(
|
|
|
|
__field( int, root )
|
|
|
|
__field( int, id )
|
|
|
|
__field( int, level )
|
|
|
|
- __dynamic_array(char, path,
|
|
|
|
- cgroup_path(cgrp, NULL, 0) + 1)
|
|
|
|
+ __string( path, path )
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_fast_assign(
|
|
|
|
__entry->root = cgrp->root->hierarchy_id;
|
|
|
|
__entry->id = cgrp->id;
|
|
|
|
__entry->level = cgrp->level;
|
|
|
|
- cgroup_path(cgrp, __get_dynamic_array(path),
|
|
|
|
- __get_dynamic_array_len(path));
|
|
|
|
+ __assign_str(path, path);
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_printk("root=%d id=%d level=%d path=%s",
|
|
|
|
@@ -79,45 +77,45 @@ DECLARE_EVENT_CLASS(cgroup,
|
|
|
|
|
|
|
|
DEFINE_EVENT(cgroup, cgroup_mkdir,
|
|
|
|
|
|
|
|
- TP_PROTO(struct cgroup *cgroup),
|
|
|
|
+ TP_PROTO(struct cgroup *cgrp, const char *path),
|
|
|
|
|
|
|
|
- TP_ARGS(cgroup)
|
|
|
|
+ TP_ARGS(cgrp, path)
|
|
|
|
);
|
|
|
|
|
|
|
|
DEFINE_EVENT(cgroup, cgroup_rmdir,
|
|
|
|
|
|
|
|
- TP_PROTO(struct cgroup *cgroup),
|
|
|
|
+ TP_PROTO(struct cgroup *cgrp, const char *path),
|
|
|
|
|
|
|
|
- TP_ARGS(cgroup)
|
|
|
|
+ TP_ARGS(cgrp, path)
|
|
|
|
);
|
|
|
|
|
|
|
|
DEFINE_EVENT(cgroup, cgroup_release,
|
|
|
|
|
|
|
|
- TP_PROTO(struct cgroup *cgroup),
|
|
|
|
+ TP_PROTO(struct cgroup *cgrp, const char *path),
|
|
|
|
|
|
|
|
- TP_ARGS(cgroup)
|
|
|
|
+ TP_ARGS(cgrp, path)
|
|
|
|
);
|
|
|
|
|
|
|
|
DEFINE_EVENT(cgroup, cgroup_rename,
|
|
|
|
|
|
|
|
- TP_PROTO(struct cgroup *cgroup),
|
|
|
|
+ TP_PROTO(struct cgroup *cgrp, const char *path),
|
|
|
|
|
|
|
|
- TP_ARGS(cgroup)
|
|
|
|
+ TP_ARGS(cgrp, path)
|
|
|
|
);
|
|
|
|
|
|
|
|
DECLARE_EVENT_CLASS(cgroup_migrate,
|
|
|
|
|
|
|
|
- TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
|
|
|
|
+ TP_PROTO(struct cgroup *dst_cgrp, const char *path,
|
|
|
|
+ struct task_struct *task, bool threadgroup),
|
|
|
|
|
|
|
|
- TP_ARGS(dst_cgrp, task, threadgroup),
|
|
|
|
+ TP_ARGS(dst_cgrp, path, task, threadgroup),
|
|
|
|
|
|
|
|
TP_STRUCT__entry(
|
|
|
|
__field( int, dst_root )
|
|
|
|
__field( int, dst_id )
|
|
|
|
__field( int, dst_level )
|
|
|
|
- __dynamic_array(char, dst_path,
|
|
|
|
- cgroup_path(dst_cgrp, NULL, 0) + 1)
|
|
|
|
__field( int, pid )
|
|
|
|
+ __string( dst_path, path )
|
|
|
|
__string( comm, task->comm )
|
|
|
|
),
|
|
|
|
|
|
|
|
@@ -125,8 +123,7 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
|
|
|
|
__entry->dst_root = dst_cgrp->root->hierarchy_id;
|
|
|
|
__entry->dst_id = dst_cgrp->id;
|
|
|
|
__entry->dst_level = dst_cgrp->level;
|
|
|
|
- cgroup_path(dst_cgrp, __get_dynamic_array(dst_path),
|
|
|
|
- __get_dynamic_array_len(dst_path));
|
|
|
|
+ __assign_str(dst_path, path);
|
|
|
|
__entry->pid = task->pid;
|
|
|
|
__assign_str(comm, task->comm);
|
|
|
|
),
|
|
|
|
@@ -138,16 +135,18 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
|
|
|
|
|
|
|
|
DEFINE_EVENT(cgroup_migrate, cgroup_attach_task,
|
|
|
|
|
|
|
|
- TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
|
|
|
|
+ TP_PROTO(struct cgroup *dst_cgrp, const char *path,
|
|
|
|
+ struct task_struct *task, bool threadgroup),
|
|
|
|
|
|
|
|
- TP_ARGS(dst_cgrp, task, threadgroup)
|
|
|
|
+ TP_ARGS(dst_cgrp, path, task, threadgroup)
|
|
|
|
);
|
|
|
|
|
|
|
|
DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks,
|
|
|
|
|
|
|
|
- TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
|
|
|
|
+ TP_PROTO(struct cgroup *dst_cgrp, const char *path,
|
|
|
|
+ struct task_struct *task, bool threadgroup),
|
|
|
|
|
|
|
|
- TP_ARGS(dst_cgrp, task, threadgroup)
|
|
|
|
+ TP_ARGS(dst_cgrp, path, task, threadgroup)
|
|
|
|
);
|
|
|
|
|
|
|
|
#endif /* _TRACE_CGROUP_H */
|
|
|
|
--- a/kernel/cgroup/cgroup-internal.h
|
|
|
|
+++ b/kernel/cgroup/cgroup-internal.h
|
|
|
|
@@ -8,6 +8,32 @@
|
|
|
|
#include <linux/list.h>
|
|
|
|
#include <linux/refcount.h>
|
|
|
|
|
|
|
|
+#define TRACE_CGROUP_PATH_LEN 1024
|
|
|
|
+extern spinlock_t trace_cgroup_path_lock;
|
|
|
|
+extern char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * cgroup_path() takes a spin lock. It is good practice not to take
|
|
|
|
+ * spin locks within trace point handlers, as they are mostly hidden
|
|
|
|
+ * from normal view. As cgroup_path() can take the kernfs_rename_lock
|
|
|
|
+ * spin lock, it is best to not call that function from the trace event
|
|
|
|
+ * handler.
|
|
|
|
+ *
|
|
|
|
+ * Note: trace_cgroup_##type##_enabled() is a static branch that will only
|
|
|
|
+ * be set when the trace event is enabled.
|
|
|
|
+ */
|
|
|
|
+#define TRACE_CGROUP_PATH(type, cgrp, ...) \
|
|
|
|
+ do { \
|
|
|
|
+ if (trace_cgroup_##type##_enabled()) { \
|
|
|
|
+ spin_lock(&trace_cgroup_path_lock); \
|
|
|
|
+ cgroup_path(cgrp, trace_cgroup_path, \
|
|
|
|
+ TRACE_CGROUP_PATH_LEN); \
|
|
|
|
+ trace_cgroup_##type(cgrp, trace_cgroup_path, \
|
|
|
|
+ ##__VA_ARGS__); \
|
|
|
|
+ spin_unlock(&trace_cgroup_path_lock); \
|
|
|
|
+ } \
|
|
|
|
+ } while (0)
|
|
|
|
+
|
|
|
|
/*
|
|
|
|
* A cgroup can be associated with multiple css_sets as different tasks may
|
|
|
|
* belong to different cgroups on different hierarchies. In the other
|
|
|
|
--- a/kernel/cgroup/cgroup-v1.c
|
|
|
|
+++ b/kernel/cgroup/cgroup-v1.c
|
|
|
|
@@ -135,7 +135,7 @@ int cgroup_transfer_tasks(struct cgroup
|
|
|
|
if (task) {
|
|
|
|
ret = cgroup_migrate(task, false, &mgctx);
|
|
|
|
if (!ret)
|
|
|
|
- trace_cgroup_transfer_tasks(to, task, false);
|
|
|
|
+ TRACE_CGROUP_PATH(transfer_tasks, to, task, false);
|
|
|
|
put_task_struct(task);
|
|
|
|
}
|
|
|
|
} while (task && !ret);
|
|
|
|
@@ -865,7 +865,7 @@ static int cgroup1_rename(struct kernfs_
|
|
|
|
|
|
|
|
ret = kernfs_rename(kn, new_parent, new_name_str);
|
|
|
|
if (!ret)
|
|
|
|
- trace_cgroup_rename(cgrp);
|
|
|
|
+ TRACE_CGROUP_PATH(rename, cgrp);
|
|
|
|
|
|
|
|
mutex_unlock(&cgroup_mutex);
|
|
|
|
|
|
|
|
--- a/kernel/cgroup/cgroup.c
|
|
|
|
+++ b/kernel/cgroup/cgroup.c
|
|
|
|
@@ -83,6 +83,9 @@ EXPORT_SYMBOL_GPL(cgroup_mutex);
|
|
|
|
EXPORT_SYMBOL_GPL(css_set_lock);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
+DEFINE_SPINLOCK(trace_cgroup_path_lock);
|
|
|
|
+char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
|
|
|
|
+
|
|
|
|
/*
|
|
|
|
* Protects cgroup_idr and css_idr so that IDs can be released without
|
|
|
|
* grabbing cgroup_mutex.
|
|
|
|
@@ -2638,7 +2641,7 @@ int cgroup_attach_task(struct cgroup *ds
|
|
|
|
cgroup_migrate_finish(&mgctx);
|
|
|
|
|
|
|
|
if (!ret)
|
|
|
|
- trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
|
|
|
|
+ TRACE_CGROUP_PATH(attach_task, dst_cgrp, leader, threadgroup);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
@@ -4634,7 +4637,7 @@ static void css_release_work_fn(struct w
|
|
|
|
struct cgroup *tcgrp;
|
|
|
|
|
|
|
|
/* cgroup release path */
|
|
|
|
- trace_cgroup_release(cgrp);
|
|
|
|
+ TRACE_CGROUP_PATH(release, cgrp);
|
|
|
|
|
|
|
|
if (cgroup_on_dfl(cgrp))
|
|
|
|
cgroup_rstat_flush(cgrp);
|
|
|
|
@@ -4977,7 +4980,7 @@ int cgroup_mkdir(struct kernfs_node *par
|
|
|
|
if (ret)
|
|
|
|
goto out_destroy;
|
|
|
|
|
|
|
|
- trace_cgroup_mkdir(cgrp);
|
|
|
|
+ TRACE_CGROUP_PATH(mkdir, cgrp);
|
|
|
|
|
|
|
|
/* let's create and online css's */
|
|
|
|
kernfs_activate(kn);
|
|
|
|
@@ -5165,9 +5168,8 @@ int cgroup_rmdir(struct kernfs_node *kn)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
ret = cgroup_destroy_locked(cgrp);
|
|
|
|
-
|
|
|
|
if (!ret)
|
|
|
|
- trace_cgroup_rmdir(cgrp);
|
|
|
|
+ TRACE_CGROUP_PATH(rmdir, cgrp);
|
|
|
|
|
|
|
|
cgroup_kn_unlock(kn);
|
|
|
|
return ret;
|