From e7404bdc7a924a84fe61c0bcbe7ecefcf0287555 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 3 Jul 2009 08:44:43 -0500 Subject: [092/254] mm: More lock breaks in slab.c Handle __free_pages outside of the locked regions. This reduces the lock contention on the percpu slab locks in -rt significantly. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner --- mm/slab.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 22 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 341748b..15bce6b 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -727,6 +727,7 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) #endif static DEFINE_PER_CPU(struct delayed_work, slab_reap_work); +static DEFINE_PER_CPU(struct list_head, slab_free_list); static DEFINE_LOCAL_IRQ_LOCK(slab_lock); #ifndef CONFIG_PREEMPT_RT_BASE @@ -742,14 +743,39 @@ slab_on_each_cpu(void (*func)(void *arg, int this_cpu), void *arg) { unsigned int i; - for_each_online_cpu(i) { - spin_lock_irq(&per_cpu(slab_lock, i).lock); + for_each_online_cpu(i) func(arg, i); - spin_unlock_irq(&per_cpu(slab_lock, i).lock); - } } #endif +static void free_delayed(struct list_head *h) +{ + while(!list_empty(h)) { + struct page *page = list_first_entry(h, struct page, lru); + + list_del(&page->lru); + __free_pages(page, page->index); + } +} + +static void unlock_l3_and_free_delayed(spinlock_t *list_lock) +{ + LIST_HEAD(tmp); + + list_splice_init(&__get_cpu_var(slab_free_list), &tmp); + local_spin_unlock_irq(slab_lock, list_lock); + free_delayed(&tmp); +} + +static void unlock_slab_and_free_delayed(unsigned long flags) +{ + LIST_HEAD(tmp); + + list_splice_init(&__get_cpu_var(slab_free_list), &tmp); + local_unlock_irqrestore(slab_lock, flags); + free_delayed(&tmp); +} + static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) { return cachep->array[smp_processor_id()]; @@ -1245,7 +1271,7 @@ static void __cpuinit cpuup_canceled(long cpu) free_block(cachep, nc->entry, nc->avail, node); if (!cpumask_empty(mask)) { - local_spin_unlock_irq(slab_lock, &l3->list_lock); + unlock_l3_and_free_delayed(&l3->list_lock); goto free_array_cache; } @@ -1259,7 +1285,7 @@ static void __cpuinit cpuup_canceled(long cpu) alien = l3->alien; l3->alien = NULL; - local_spin_unlock_irq(slab_lock, &l3->list_lock); + unlock_l3_and_free_delayed(&l3->list_lock); kfree(shared); if (alien) { @@ -1540,6 +1566,8 @@ void __init kmem_cache_init(void) use_alien_caches = 0; local_irq_lock_init(slab_lock); + for_each_possible_cpu(i) + INIT_LIST_HEAD(&per_cpu(slab_free_list, i)); for (i = 0; i < NUM_INIT_LISTS; i++) { kmem_list3_init(&initkmem_list3[i]); @@ -1868,12 +1896,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) /* * Interface to system's page release. */ -static void kmem_freepages(struct kmem_cache *cachep, void *addr) +static void kmem_freepages(struct kmem_cache *cachep, void *addr, bool delayed) { unsigned long i = (1 << cachep->gfporder); - struct page *page = virt_to_page(addr); + struct page *page, *basepage = virt_to_page(addr); const unsigned long nr_freed = i; + page = basepage; + kmemcheck_free_shadow(page, cachep->gfporder); if (cachep->flags & SLAB_RECLAIM_ACCOUNT) @@ -1889,7 +1919,13 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) } if (current->reclaim_state) current->reclaim_state->reclaimed_slab += nr_freed; - free_pages((unsigned long)addr, cachep->gfporder); + + if (!delayed) { + free_pages((unsigned long)addr, cachep->gfporder); + } else { + basepage->index = cachep->gfporder; + list_add(&basepage->lru, &__get_cpu_var(slab_free_list)); + } } static void kmem_rcu_free(struct rcu_head *head) @@ -1897,7 +1933,7 @@ static void kmem_rcu_free(struct rcu_head *head) struct slab_rcu *slab_rcu = (struct slab_rcu *)head; struct kmem_cache *cachep = slab_rcu->cachep; - kmem_freepages(cachep, slab_rcu->addr); + kmem_freepages(cachep, slab_rcu->addr, false); if (OFF_SLAB(cachep)) kmem_cache_free(cachep->slabp_cache, slab_rcu); } @@ -2116,7 +2152,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab * Before calling the slab must have been unlinked from the cache. The * cache-lock is not held/needed. */ -static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) +static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp, + bool delayed) { void *addr = slabp->s_mem - slabp->colouroff; @@ -2129,7 +2166,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) slab_rcu->addr = addr; call_rcu(&slab_rcu->head, kmem_rcu_free); } else { - kmem_freepages(cachep, addr); + kmem_freepages(cachep, addr, delayed); if (OFF_SLAB(cachep)) kmem_cache_free(cachep->slabp_cache, slabp); } @@ -2651,9 +2688,15 @@ static void do_drain(void *arg) __do_drain(arg, smp_processor_id()); } #else -static void do_drain(void *arg, int this_cpu) +static void do_drain(void *arg, int cpu) { - __do_drain(arg, this_cpu); + LIST_HEAD(tmp); + + spin_lock_irq(&per_cpu(slab_lock, cpu).lock); + __do_drain(arg, cpu); + list_splice_init(&per_cpu(slab_free_list, cpu), &tmp); + spin_unlock_irq(&per_cpu(slab_lock, cpu).lock); + free_delayed(&tmp); } #endif @@ -2711,7 +2754,7 @@ static int drain_freelist(struct kmem_cache *cache, */ l3->free_objects -= cache->num; local_spin_unlock_irq(slab_lock, &l3->list_lock); - slab_destroy(cache, slabp); + slab_destroy(cache, slabp, false); nr_freed++; } out: @@ -3046,7 +3089,7 @@ static int cache_grow(struct kmem_cache *cachep, spin_unlock(&l3->list_lock); return 1; opps1: - kmem_freepages(cachep, objp); + kmem_freepages(cachep, objp, false); failed: if (local_flags & __GFP_WAIT) local_lock_irq(slab_lock); @@ -3700,7 +3743,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, * a different cache, refer to comments before * alloc_slabmgmt. */ - slab_destroy(cachep, slabp); + slab_destroy(cachep, slabp, true); } else { list_add(&slabp->list, &l3->slabs_free); } @@ -3967,7 +4010,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) debug_check_no_obj_freed(objp, obj_size(cachep)); local_lock_irqsave(slab_lock, flags); __cache_free(cachep, objp, __builtin_return_address(0)); - local_unlock_irqrestore(slab_lock, flags); + unlock_slab_and_free_delayed(flags); trace_kmem_cache_free(_RET_IP_, objp); } @@ -3997,7 +4040,7 @@ void kfree(const void *objp) debug_check_no_obj_freed(objp, obj_size(c)); local_lock_irqsave(slab_lock, flags); __cache_free(c, (void *)objp, __builtin_return_address(0)); - local_unlock_irqrestore(slab_lock, flags); + unlock_slab_and_free_delayed(flags); } EXPORT_SYMBOL(kfree); @@ -4053,7 +4096,8 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) } l3->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - local_spin_unlock_irq(slab_lock, &l3->list_lock); + unlock_l3_and_free_delayed(&l3->list_lock); + kfree(shared); free_alien_cache(new_alien); continue; @@ -4119,7 +4163,9 @@ static void do_ccupdate_local(void *info) #else static void do_ccupdate_local(void *info, int cpu) { + spin_lock_irq(&per_cpu(slab_lock, cpu).lock); __do_ccupdate_local(info, cpu); + spin_unlock_irq(&per_cpu(slab_lock, cpu).lock); } #endif @@ -4161,8 +4207,8 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, local_spin_lock_irq(slab_lock, &cachep->nodelists[cpu_to_mem(i)]->list_lock); free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i)); - local_spin_unlock_irq(slab_lock, - &cachep->nodelists[cpu_to_mem(i)]->list_lock); + + unlock_l3_and_free_delayed(&cachep->nodelists[cpu_to_mem(i)]->list_lock); kfree(ccold); } kfree(new);