Subject: mm-slab-wrap-functions.patch From: Thomas Gleixner Date: Sat, 18 Jun 2011 19:44:43 +0200 Signed-off-by: Thomas Gleixner --- mm/slab.c | 154 ++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 105 insertions(+), 49 deletions(-) Index: linux-stable/mm/slab.c =================================================================== --- linux-stable.orig/mm/slab.c +++ linux-stable/mm/slab.c @@ -117,6 +117,7 @@ #include #include #include +#include #include @@ -703,12 +704,40 @@ static void slab_set_debugobj_lock_class #endif static DEFINE_PER_CPU(struct delayed_work, slab_reap_work); +static DEFINE_LOCAL_IRQ_LOCK(slab_lock); + +#ifndef CONFIG_PREEMPT_RT_BASE +# define slab_on_each_cpu(func, cp) on_each_cpu(func, cp, 1) +#else +/* + * execute func() for all CPUs. On PREEMPT_RT we dont actually have + * to run on the remote CPUs - we only have to take their CPU-locks. + * (This is a rare operation, so cacheline bouncing is not an issue.) + */ +static void +slab_on_each_cpu(void (*func)(void *arg, int this_cpu), void *arg) +{ + unsigned int i; + + for_each_online_cpu(i) { + spin_lock_irq(&per_cpu(slab_lock, i).lock); + func(arg, i); + spin_unlock_irq(&per_cpu(slab_lock, i).lock); + } +} +#endif static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) { return cachep->array[smp_processor_id()]; } +static inline struct array_cache *cpu_cache_get_on_cpu(struct kmem_cache *cachep, + int cpu) +{ + return cachep->array[cpu]; +} + static inline struct kmem_cache *__find_general_cachep(size_t size, gfp_t gfpflags) { @@ -1175,9 +1204,10 @@ static void reap_alien(struct kmem_cache if (l3->alien) { struct array_cache *ac = l3->alien[node]; - if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { + if (ac && ac->avail && + local_spin_trylock_irq(slab_lock, &ac->lock)) { __drain_alien_cache(cachep, ac, node); - spin_unlock_irq(&ac->lock); + local_spin_unlock_irq(slab_lock, &ac->lock); } } } @@ -1192,9 +1222,9 @@ static void drain_alien_cache(struct kme for_each_online_node(i) { ac = alien[i]; if (ac) { - spin_lock_irqsave(&ac->lock, flags); + local_spin_lock_irqsave(slab_lock, &ac->lock, flags); __drain_alien_cache(cachep, ac, i); - spin_unlock_irqrestore(&ac->lock, flags); + local_spin_unlock_irqrestore(slab_lock, &ac->lock, flags); } } } @@ -1273,11 +1303,11 @@ static int init_cache_nodelists_node(int cachep->nodelists[node] = l3; } - spin_lock_irq(&cachep->nodelists[node]->list_lock); + local_spin_lock_irq(slab_lock, &cachep->nodelists[node]->list_lock); cachep->nodelists[node]->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - spin_unlock_irq(&cachep->nodelists[node]->list_lock); + local_spin_unlock_irq(slab_lock, &cachep->nodelists[node]->list_lock); } return 0; } @@ -1302,7 +1332,7 @@ static void __cpuinit cpuup_canceled(lon if (!l3) goto free_array_cache; - spin_lock_irq(&l3->list_lock); + local_spin_lock_irq(slab_lock, &l3->list_lock); /* Free limit for this kmem_list3 */ l3->free_limit -= cachep->batchcount; @@ -1310,7 +1340,7 @@ static void __cpuinit cpuup_canceled(lon free_block(cachep, nc->entry, nc->avail, node); if (!cpumask_empty(mask)) { - spin_unlock_irq(&l3->list_lock); + local_spin_unlock_irq(slab_lock, &l3->list_lock); goto free_array_cache; } @@ -1324,7 +1354,7 @@ static void __cpuinit cpuup_canceled(lon alien = l3->alien; l3->alien = NULL; - spin_unlock_irq(&l3->list_lock); + local_spin_unlock_irq(slab_lock, &l3->list_lock); kfree(shared); if (alien) { @@ -1398,7 +1428,7 @@ static int __cpuinit cpuup_prepare(long l3 = cachep->nodelists[node]; BUG_ON(!l3); - spin_lock_irq(&l3->list_lock); + local_spin_lock_irq(slab_lock, &l3->list_lock); if (!l3->shared) { /* * We are serialised from CPU_DEAD or @@ -1413,7 +1443,7 @@ static int __cpuinit cpuup_prepare(long alien = NULL; } #endif - spin_unlock_irq(&l3->list_lock); + local_spin_unlock_irq(slab_lock, &l3->list_lock); kfree(shared); free_alien_cache(alien); if (cachep->flags & SLAB_DEBUG_OBJECTS) @@ -1604,6 +1634,8 @@ void __init kmem_cache_init(void) if (num_possible_nodes() == 1) use_alien_caches = 0; + local_irq_lock_init(slab_lock); + for (i = 0; i < NUM_INIT_LISTS; i++) { kmem_list3_init(&initkmem_list3[i]); if (i < MAX_NUMNODES) @@ -2614,7 +2646,7 @@ __kmem_cache_create (const char *name, s #if DEBUG static void check_irq_off(void) { - BUG_ON(!irqs_disabled()); + BUG_ON_NONRT(!irqs_disabled()); } static void check_irq_on(void) @@ -2649,26 +2681,37 @@ static void drain_array(struct kmem_cach struct array_cache *ac, int force, int node); -static void do_drain(void *arg) +static void __do_drain(void *arg, unsigned int cpu) { struct kmem_cache *cachep = arg; struct array_cache *ac; - int node = numa_mem_id(); + int node = cpu_to_mem(cpu); - check_irq_off(); - ac = cpu_cache_get(cachep); + ac = cpu_cache_get_on_cpu(cachep, cpu); spin_lock(&cachep->nodelists[node]->list_lock); free_block(cachep, ac->entry, ac->avail, node); spin_unlock(&cachep->nodelists[node]->list_lock); ac->avail = 0; } +#ifndef CONFIG_PREEMPT_RT_BASE +static void do_drain(void *arg) +{ + __do_drain(arg, smp_processor_id()); +} +#else +static void do_drain(void *arg, int this_cpu) +{ + __do_drain(arg, this_cpu); +} +#endif + static void drain_cpu_caches(struct kmem_cache *cachep) { struct kmem_list3 *l3; int node; - on_each_cpu(do_drain, cachep, 1); + slab_on_each_cpu(do_drain, cachep); check_irq_on(); for_each_online_node(node) { l3 = cachep->nodelists[node]; @@ -2699,10 +2742,10 @@ static int drain_freelist(struct kmem_ca nr_freed = 0; while (nr_freed < tofree && !list_empty(&l3->slabs_free)) { - spin_lock_irq(&l3->list_lock); + local_spin_lock_irq(slab_lock, &l3->list_lock); p = l3->slabs_free.prev; if (p == &l3->slabs_free) { - spin_unlock_irq(&l3->list_lock); + local_spin_unlock_irq(slab_lock, &l3->list_lock); goto out; } @@ -2716,7 +2759,7 @@ static int drain_freelist(struct kmem_ca * to the cache. */ l3->free_objects -= cache->num; - spin_unlock_irq(&l3->list_lock); + local_spin_unlock_irq(slab_lock, &l3->list_lock); slab_destroy(cache, slabp); nr_freed++; } @@ -3011,7 +3054,7 @@ static int cache_grow(struct kmem_cache offset *= cachep->colour_off; if (local_flags & __GFP_WAIT) - local_irq_enable(); + local_unlock_irq(slab_lock); /* * The test for missing atomic flag is performed here, rather than @@ -3041,7 +3084,7 @@ static int cache_grow(struct kmem_cache cache_init_objs(cachep, slabp); if (local_flags & __GFP_WAIT) - local_irq_disable(); + local_lock_irq(slab_lock); check_irq_off(); spin_lock(&l3->list_lock); @@ -3055,7 +3098,7 @@ opps1: kmem_freepages(cachep, objp); failed: if (local_flags & __GFP_WAIT) - local_irq_disable(); + local_lock_irq(slab_lock); return 0; } @@ -3469,11 +3512,11 @@ retry: * set and go into memory reserves if necessary. */ if (local_flags & __GFP_WAIT) - local_irq_enable(); + local_unlock_irq(slab_lock); kmem_flagcheck(cache, flags); obj = kmem_getpages(cache, local_flags, numa_mem_id()); if (local_flags & __GFP_WAIT) - local_irq_disable(); + local_lock_irq(slab_lock); if (obj) { /* * Insert into the appropriate per node queues @@ -3591,7 +3634,7 @@ __cache_alloc_node(struct kmem_cache *ca return NULL; cache_alloc_debugcheck_before(cachep, flags); - local_irq_save(save_flags); + local_lock_irqsave(slab_lock, save_flags); if (nodeid == NUMA_NO_NODE) nodeid = slab_node; @@ -3616,7 +3659,7 @@ __cache_alloc_node(struct kmem_cache *ca /* ___cache_alloc_node can fall back to other nodes */ ptr = ____cache_alloc_node(cachep, flags, nodeid); out: - local_irq_restore(save_flags); + local_unlock_irqrestore(slab_lock, save_flags); ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags, flags); @@ -3676,9 +3719,9 @@ __cache_alloc(struct kmem_cache *cachep, return NULL; cache_alloc_debugcheck_before(cachep, flags); - local_irq_save(save_flags); + local_lock_irqsave(slab_lock, save_flags); objp = __do_cache_alloc(cachep, flags); - local_irq_restore(save_flags); + local_unlock_irqrestore(slab_lock, save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags, flags); @@ -3994,9 +4037,9 @@ void kmem_cache_free(struct kmem_cache * debug_check_no_locks_freed(objp, cachep->object_size); if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) debug_check_no_obj_freed(objp, cachep->object_size); - local_irq_save(flags); + local_lock_irqsave(slab_lock, flags); __cache_free(cachep, objp, __builtin_return_address(0)); - local_irq_restore(flags); + local_unlock_irqrestore(slab_lock, flags); trace_kmem_cache_free(_RET_IP_, objp); } @@ -4025,9 +4068,9 @@ void kfree(const void *objp) debug_check_no_locks_freed(objp, c->object_size); debug_check_no_obj_freed(objp, c->object_size); - local_irq_save(flags); + local_lock_irqsave(slab_lock, flags); __cache_free(c, (void *)objp, __builtin_return_address(0)); - local_irq_restore(flags); + local_unlock_irqrestore(slab_lock, flags); } EXPORT_SYMBOL(kfree); @@ -4070,7 +4113,7 @@ static int alloc_kmemlist(struct kmem_ca if (l3) { struct array_cache *shared = l3->shared; - spin_lock_irq(&l3->list_lock); + local_spin_lock_irq(slab_lock, &l3->list_lock); if (shared) free_block(cachep, shared->entry, @@ -4083,7 +4126,7 @@ static int alloc_kmemlist(struct kmem_ca } l3->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - spin_unlock_irq(&l3->list_lock); + local_spin_unlock_irq(slab_lock, &l3->list_lock); kfree(shared); free_alien_cache(new_alien); continue; @@ -4130,17 +4173,28 @@ struct ccupdate_struct { struct array_cache *new[0]; }; -static void do_ccupdate_local(void *info) +static void __do_ccupdate_local(void *info, int cpu) { struct ccupdate_struct *new = info; struct array_cache *old; - check_irq_off(); - old = cpu_cache_get(new->cachep); + old = cpu_cache_get_on_cpu(new->cachep, cpu); + + new->cachep->array[cpu] = new->new[cpu]; + new->new[cpu] = old; +} - new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()]; - new->new[smp_processor_id()] = old; +#ifndef CONFIG_PREEMPT_RT_BASE +static void do_ccupdate_local(void *info) +{ + __do_ccupdate_local(info, smp_processor_id()); } +#else +static void do_ccupdate_local(void *info, int cpu) +{ + __do_ccupdate_local(info, cpu); +} +#endif /* Always called with the slab_mutex held */ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, @@ -4166,7 +4220,7 @@ static int do_tune_cpucache(struct kmem_ } new->cachep = cachep; - on_each_cpu(do_ccupdate_local, (void *)new, 1); + slab_on_each_cpu(do_ccupdate_local, (void *)new); check_irq_on(); cachep->batchcount = batchcount; @@ -4177,9 +4231,11 @@ static int do_tune_cpucache(struct kmem_ struct array_cache *ccold = new->new[i]; if (!ccold) continue; - spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock); + local_spin_lock_irq(slab_lock, + &cachep->nodelists[cpu_to_mem(i)]->list_lock); free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i)); - spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock); + local_spin_unlock_irq(slab_lock, + &cachep->nodelists[cpu_to_mem(i)]->list_lock); kfree(ccold); } kfree(new); @@ -4255,7 +4311,7 @@ static void drain_array(struct kmem_cach if (ac->touched && !force) { ac->touched = 0; } else { - spin_lock_irq(&l3->list_lock); + local_spin_lock_irq(slab_lock, &l3->list_lock); if (ac->avail) { tofree = force ? ac->avail : (ac->limit + 4) / 5; if (tofree > ac->avail) @@ -4265,7 +4321,7 @@ static void drain_array(struct kmem_cach memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail); } - spin_unlock_irq(&l3->list_lock); + local_spin_unlock_irq(slab_lock, &l3->list_lock); } } @@ -4404,7 +4460,7 @@ static int s_show(struct seq_file *m, vo continue; check_irq_on(); - spin_lock_irq(&l3->list_lock); + local_spin_lock_irq(slab_lock, &l3->list_lock); list_for_each_entry(slabp, &l3->slabs_full, list) { if (slabp->inuse != cachep->num && !error) @@ -4429,7 +4485,7 @@ static int s_show(struct seq_file *m, vo if (l3->shared) shared_avail += l3->shared->avail; - spin_unlock_irq(&l3->list_lock); + local_spin_unlock_irq(slab_lock, &l3->list_lock); } num_slabs += active_slabs; num_objs = num_slabs * cachep->num; @@ -4658,13 +4714,13 @@ static int leaks_show(struct seq_file *m continue; check_irq_on(); - spin_lock_irq(&l3->list_lock); + local_spin_lock_irq(slab_lock, &l3->list_lock); list_for_each_entry(slabp, &l3->slabs_full, list) handle_slab(n, cachep, slabp); list_for_each_entry(slabp, &l3->slabs_partial, list) handle_slab(n, cachep, slabp); - spin_unlock_irq(&l3->list_lock); + local_spin_unlock_irq(slab_lock, &l3->list_lock); } name = cachep->name; if (n[0] == n[1]) {