Subject: ipc/sem: Rework semaphore wakeups From: Peter Zijlstra Date: Wed, 14 Sep 2011 11:57:04 +0200 Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/3.10/patches-3.10.4-rt1.tar.xz Subject: ipc/sem: Rework semaphore wakeups From: Peter Zijlstra Date: Tue Sep 13 15:09:40 CEST 2011 Current sysv sems have a weird ass wakeup scheme that involves keeping preemption disabled over a potential O(n^2) loop and busy waiting on that on other CPUs. Kill this and simply wake the task directly from under the sem_lock. This was discovered by a migrate_disable() debug feature that disallows: spin_lock(); preempt_disable(); spin_unlock() preempt_enable(); Cc: Manfred Spraul Suggested-by: Thomas Gleixner Reported-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Manfred Spraul Link: http://lkml.kernel.org/r/1315994224.5040.1.camel@twins Signed-off-by: Thomas Gleixner --- ipc/sem.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) Index: linux-stable/ipc/sem.c =================================================================== --- linux-stable.orig/ipc/sem.c +++ linux-stable/ipc/sem.c @@ -155,7 +155,7 @@ static int sysvipc_sem_proc_show(struct * sem_array.sem_pending{,last}, * sem_array.sem_undo: sem_lock() for read/write * sem_undo.proc_next: only "current" is allowed to read/write that field. - * + * */ #define sc_semmsl sem_ctls[0] @@ -498,7 +498,7 @@ static int try_atomic_semop (struct sem_ curr = sma->sem_base + sop->sem_num; sem_op = sop->sem_op; result = curr->semval; - + if (!sem_op && result) goto would_block; @@ -525,7 +525,7 @@ static int try_atomic_semop (struct sem_ un->semadj[sop->sem_num] -= sop->sem_op; sop--; } - + return 0; out_of_range: @@ -557,6 +557,13 @@ undo: static void wake_up_sem_queue_prepare(struct list_head *pt, struct sem_queue *q, int error) { +#ifdef CONFIG_PREEMPT_RT_BASE + struct task_struct *p = q->sleeper; + get_task_struct(p); + q->status = error; + wake_up_process(p); + put_task_struct(p); +#else if (list_empty(pt)) { /* * Hold preempt off so that we don't get preempted and have the @@ -568,6 +575,7 @@ static void wake_up_sem_queue_prepare(st q->pid = error; list_add_tail(&q->list, pt); +#endif } /** @@ -581,6 +589,7 @@ static void wake_up_sem_queue_prepare(st */ static void wake_up_sem_queue_do(struct list_head *pt) { +#ifndef CONFIG_PREEMPT_RT_BASE struct sem_queue *q, *t; int did_something; @@ -593,6 +602,7 @@ static void wake_up_sem_queue_do(struct } if (did_something) preempt_enable(); +#endif } static void unlink_queue(struct sem_array *sma, struct sem_queue *q) @@ -947,7 +957,7 @@ static int semctl_nolock(struct ipc_name err = security_sem_semctl(NULL, cmd); if (err) return err; - + memset(&seminfo,0,sizeof(seminfo)); seminfo.semmni = ns->sc_semmni; seminfo.semmns = ns->sc_semmns; @@ -967,7 +977,7 @@ static int semctl_nolock(struct ipc_name } max_id = ipc_get_maxid(&sem_ids(ns)); up_read(&sem_ids(ns).rw_mutex); - if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) + if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) return -EFAULT; return (max_id < 0) ? 0: max_id; } @@ -1642,7 +1652,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, /* We need to sleep on this operation, so we put the current * task into the pending queue and go to sleep. */ - + queue.sops = sops; queue.nsops = nsops; queue.undo = un; @@ -1765,7 +1775,7 @@ int copy_semundo(unsigned long clone_fla return error; atomic_inc(&undo_list->refcnt); tsk->sysvsem.undo_list = undo_list; - } else + } else tsk->sysvsem.undo_list = NULL; return 0;