From: Peter Zijlstra Date: Fri Jul 3 08:44:37 2009 -0500 Subject: mm: page_alloc: Reduce lock sections further Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.9/older/patches-4.9.30-rt20.tar.xz Split out the pages which are to be freed into a separate list and call free_pages_bulk() outside of the percpu page allocator locks. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner --- mm/page_alloc.c | 94 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 28 deletions(-) --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1085,7 +1085,7 @@ static bool bulkfree_pcp_prepare(struct #endif /* CONFIG_DEBUG_VM */ /* - * Frees a number of pages from the PCP lists + * Frees a number of pages which have been collected from the pcp lists. * Assumes all pages on list are in same zone, and of same order. * count is the number of pages to free. * @@ -1096,19 +1096,58 @@ static bool bulkfree_pcp_prepare(struct * pinned" detection logic. */ static void free_pcppages_bulk(struct zone *zone, int count, - struct per_cpu_pages *pcp) + struct list_head *list) { - int migratetype = 0; - int batch_free = 0; unsigned long nr_scanned; bool isolated_pageblocks; + unsigned long flags; + + spin_lock_irqsave(&zone->lock, flags); - spin_lock(&zone->lock); isolated_pageblocks = has_isolate_pageblock(zone); nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); if (nr_scanned) __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); + while (!list_empty(list)) { + struct page *page; + int mt; /* migratetype of the to-be-freed page */ + + page = list_first_entry(list, struct page, lru); + /* must delete as __free_one_page list manipulates */ + list_del(&page->lru); + + mt = get_pcppage_migratetype(page); + /* MIGRATE_ISOLATE page should not go to pcplists */ + VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); + /* Pageblock could have been isolated meanwhile */ + if (unlikely(isolated_pageblocks)) + mt = get_pageblock_migratetype(page); + + if (bulkfree_pcp_prepare(page)) + continue; + + __free_one_page(page, page_to_pfn(page), zone, 0, mt); + trace_mm_page_pcpu_drain(page, 0, mt); + count--; + } + WARN_ON(count != 0); + spin_unlock_irqrestore(&zone->lock, flags); +} + +/* + * Moves a number of pages from the PCP lists to free list which + * is freed outside of the locked region. + * + * Assumes all pages on list are in same zone, and of same order. + * count is the number of pages to free. + */ +static void isolate_pcp_pages(int count, struct per_cpu_pages *src, + struct list_head *dst) +{ + int migratetype = 0; + int batch_free = 0; + while (count) { struct page *page; struct list_head *list; @@ -1124,7 +1163,7 @@ static void free_pcppages_bulk(struct zo batch_free++; if (++migratetype == MIGRATE_PCPTYPES) migratetype = 0; - list = &pcp->lists[migratetype]; + list = &src->lists[migratetype]; } while (list_empty(list)); /* This is the only non-empty list. Free them all. */ @@ -1132,27 +1171,12 @@ static void free_pcppages_bulk(struct zo batch_free = count; do { - int mt; /* migratetype of the to-be-freed page */ - page = list_last_entry(list, struct page, lru); - /* must delete as __free_one_page list manipulates */ list_del(&page->lru); - mt = get_pcppage_migratetype(page); - /* MIGRATE_ISOLATE page should not go to pcplists */ - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); - /* Pageblock could have been isolated meanwhile */ - if (unlikely(isolated_pageblocks)) - mt = get_pageblock_migratetype(page); - - if (bulkfree_pcp_prepare(page)) - continue; - - __free_one_page(page, page_to_pfn(page), zone, 0, mt); - trace_mm_page_pcpu_drain(page, 0, mt); + list_add(&page->lru, dst); } while (--count && --batch_free && !list_empty(list)); } - spin_unlock(&zone->lock); } static void free_one_page(struct zone *zone, @@ -1161,7 +1185,9 @@ static void free_one_page(struct zone *z int migratetype) { unsigned long nr_scanned; - spin_lock(&zone->lock); + unsigned long flags; + + spin_lock_irqsave(&zone->lock, flags); nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); if (nr_scanned) __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); @@ -1171,7 +1197,7 @@ static void free_one_page(struct zone *z migratetype = get_pfnblock_migratetype(page, pfn); } __free_one_page(page, pfn, zone, order, migratetype); - spin_unlock(&zone->lock); + spin_unlock_irqrestore(&zone->lock, flags); } static void __meminit __init_single_page(struct page *page, unsigned long pfn, @@ -2259,16 +2285,18 @@ static int rmqueue_bulk(struct zone *zon void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) { unsigned long flags; + LIST_HEAD(dst); int to_drain, batch; local_lock_irqsave(pa_lock, flags); batch = READ_ONCE(pcp->batch); to_drain = min(pcp->count, batch); if (to_drain > 0) { - free_pcppages_bulk(zone, to_drain, pcp); + isolate_pcp_pages(to_drain, pcp, &dst); pcp->count -= to_drain; } local_unlock_irqrestore(pa_lock, flags); + free_pcppages_bulk(zone, to_drain, &dst); } #endif @@ -2284,16 +2312,21 @@ static void drain_pages_zone(unsigned in unsigned long flags; struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; + LIST_HEAD(dst); + int count; cpu_lock_irqsave(cpu, flags); pset = per_cpu_ptr(zone->pageset, cpu); pcp = &pset->pcp; - if (pcp->count) { - free_pcppages_bulk(zone, pcp->count, pcp); + count = pcp->count; + if (count) { + isolate_pcp_pages(count, pcp, &dst); pcp->count = 0; } cpu_unlock_irqrestore(cpu, flags); + if (count) + free_pcppages_bulk(zone, count, &dst); } /* @@ -2475,8 +2508,13 @@ void free_hot_cold_page(struct page *pag pcp->count++; if (pcp->count >= pcp->high) { unsigned long batch = READ_ONCE(pcp->batch); - free_pcppages_bulk(zone, batch, pcp); + LIST_HEAD(dst); + + isolate_pcp_pages(batch, pcp, &dst); pcp->count -= batch; + local_unlock_irqrestore(pa_lock, flags); + free_pcppages_bulk(zone, batch, &dst); + return; } out: