From 9a4be8b680476e8bb6d8a36b49650cd93c64bac2 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 16 Feb 2023 09:45:12 -0500 Subject: [PATCH] mm: page_alloc: kill highatomic The highatomic reserves are blocks set aside specifically for higher-order atomic allocations. Since watermarks are now required to be met in pageblocks, this is no longer necessary. Signed-off-by: Johannes Weiner --- include/linux/gfp.h | 2 - include/linux/mmzone.h | 6 +- mm/internal.h | 5 -- mm/page_alloc.c | 187 ++--------------------------------------- mm/vmstat.c | 1 - 5 files changed, 10 insertions(+), 191 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 65a78773dccad2..78b5176d354efa 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -19,8 +19,6 @@ static inline int gfp_migratetype(const gfp_t gfp_flags) BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE); BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE); BUILD_BUG_ON((___GFP_RECLAIMABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_RECLAIMABLE); - BUILD_BUG_ON(((___GFP_MOVABLE | ___GFP_RECLAIMABLE) >> - GFP_MOVABLE_SHIFT) != MIGRATE_HIGHATOMIC); if (unlikely(page_group_by_mobility_disabled)) return MIGRATE_UNMOVABLE; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d1083ab81998be..c705f2f7c8296a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -44,8 +44,7 @@ enum migratetype { MIGRATE_MOVABLE, MIGRATE_RECLAIMABLE, MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ - MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES, - MIGRATE_FREE, + MIGRATE_FREE = MIGRATE_PCPTYPES, #ifdef CONFIG_CMA /* * MIGRATE_CMA migration type is designed to mimic the way @@ -142,7 +141,6 @@ enum zone_stat_item { NR_FREE_UNMOVABLE, NR_FREE_MOVABLE, NR_FREE_RECLAIMABLE, - NR_FREE_HIGHATOMIC, NR_FREE_FREE, NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */ NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE, @@ -713,8 +711,6 @@ struct zone { unsigned long _watermark[NR_WMARK]; unsigned long watermark_boost; - unsigned long nr_reserved_highatomic; - /* * We don't know if the memory that we're going to allocate will be * freeable or/and it will be released eventually, so to avoid totally diff --git a/mm/internal.h b/mm/internal.h index 5c76455f804266..24f43f5db88b7f 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -778,11 +778,6 @@ extern const struct trace_print_flags pageflag_names[]; extern const struct trace_print_flags vmaflag_names[]; extern const struct trace_print_flags gfpflag_names[]; -static inline bool is_migrate_highatomic(enum migratetype migratetype) -{ - return migratetype == MIGRATE_HIGHATOMIC; -} - void setup_zone_pageset(struct zone *zone); struct migration_target_control { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9936be1e11592f..7b1241eaff888b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -379,7 +379,6 @@ const char * const migratetype_names[MIGRATE_TYPES] = { "Unmovable", "Movable", "Reclaimable", - "HighAtomic", "Free", #ifdef CONFIG_CMA "CMA", @@ -1202,7 +1201,7 @@ static inline void __free_one_page(struct page *page, * We want to prevent merge between freepages on pageblock * without fallbacks and normal pageblock. Without this, * pageblock isolation could cause incorrect freepage or CMA - * accounting or HIGHATOMIC accounting. + * accounting. */ if (migratetype != buddy_mt && (!migratetype_is_mergeable(migratetype) || @@ -2797,13 +2796,6 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, old_block_type = get_pageblock_migratetype(page); - /* - * This can happen due to races and we want to prevent broken - * highatomic accounting. - */ - if (is_migrate_highatomic(old_block_type)) - goto single_page; - /* Take ownership for orders >= pageblock_order */ if (current_order >= pageblock_order) { change_pageblock_range(page, current_order, start_type); @@ -2918,126 +2910,6 @@ int find_suitable_fallback(struct free_area *area, unsigned int order, return -1; } -/* - * Reserve a pageblock for exclusive use of high-order atomic allocations if - * there are no empty page blocks that contain a page with a suitable order - */ -static void reserve_highatomic_pageblock(struct page *page, struct zone *zone, - unsigned int alloc_order) -{ - int mt; - unsigned long max_managed, flags; - - /* - * Limit the number reserved to 1 pageblock or roughly 1% of a zone. - * Check is race-prone but harmless. - */ - max_managed = (zone_managed_pages(zone) / 100) + pageblock_nr_pages; - if (zone->nr_reserved_highatomic >= max_managed) - return; - - spin_lock_irqsave(&zone->lock, flags); - - /* Recheck the nr_reserved_highatomic limit under the lock */ - if (zone->nr_reserved_highatomic >= max_managed) - goto out_unlock; - - /* Yoink! */ - mt = get_pageblock_migratetype(page); - /* Only reserve normal pageblocks (i.e., they can merge with others) */ - if (migratetype_is_mergeable(mt)) { - zone->nr_reserved_highatomic += pageblock_nr_pages; - set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC); - move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC, NULL); - } - -out_unlock: - spin_unlock_irqrestore(&zone->lock, flags); -} - -/* - * Used when an allocation is about to fail under memory pressure. This - * potentially hurts the reliability of high-order allocations when under - * intense memory pressure but failed atomic allocations should be easier - * to recover from than an OOM. - * - * If @force is true, try to unreserve a pageblock even though highatomic - * pageblock is exhausted. - */ -static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, - bool force) -{ - struct zonelist *zonelist = ac->zonelist; - unsigned long flags; - struct zoneref *z; - struct zone *zone; - struct page *page; - int order; - bool ret; - - for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx, - ac->nodemask) { - /* - * Preserve at least one pageblock unless memory pressure - * is really high. - */ - if (!force && zone->nr_reserved_highatomic <= - pageblock_nr_pages) - continue; - - spin_lock_irqsave(&zone->lock, flags); - for (order = 0; order < MAX_ORDER; order++) { - struct free_area *area = &(zone->free_area[order]); - int mt; - - page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC); - if (!page) - continue; - - mt = get_pageblock_migratetype(page); - /* - * In page freeing path, migratetype change is racy so - * we can counter several free pages in a pageblock - * in this loop although we changed the pageblock type - * from highatomic to ac->migratetype. So we should - * adjust the count once. - */ - if (is_migrate_highatomic(mt)) { - /* - * It should never happen but changes to - * locking could inadvertently allow a per-cpu - * drain to add pages to MIGRATE_HIGHATOMIC - * while unreserving so be safe and watch for - * underflows. - */ - zone->nr_reserved_highatomic -= min( - pageblock_nr_pages, - zone->nr_reserved_highatomic); - } - - /* - * Convert to ac->migratetype and avoid the normal - * pageblock stealing heuristics. Minimally, the caller - * is doing the work and needs the pages. More - * importantly, if the block was always converted to - * MIGRATE_UNMOVABLE or another type then the number - * of pageblocks that cannot be completely freed - * may increase. - */ - set_pageblock_migratetype(page, ac->migratetype); - ret = move_freepages_block(zone, page, mt, - ac->migratetype, NULL); - if (ret) { - spin_unlock_irqrestore(&zone->lock, flags); - return ret; - } - } - spin_unlock_irqrestore(&zone->lock, flags); - } - - return false; -} - /* * Try finding a free buddy page on the fallback list and put it on the free * list of requested migratetype, possibly along with other pages from the same @@ -3510,18 +3382,11 @@ void free_unref_page(struct page *page, unsigned int order) /* * We only track unmovable, reclaimable and movable on pcp lists. - * Place ISOLATE pages on the isolated list because they are being - * offlined but treat HIGHATOMIC as movable pages so we can get those - * areas back if necessary. Otherwise, we may have to free - * excessively into the page allocator */ migratetype = get_pcppage_migratetype(page); if (unlikely(migratetype >= MIGRATE_PCPTYPES)) { - if (unlikely(is_migrate_isolate(migratetype) || migratetype == MIGRATE_FREE)) { - free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE); - return; - } - migratetype = MIGRATE_MOVABLE; + free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE); + return; } zone = page_zone(page); @@ -3740,24 +3605,11 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, unsigned long flags; do { - page = NULL; spin_lock_irqsave(&zone->lock, flags); - /* - * order-0 request can reach here when the pcplist is skipped - * due to non-CMA allocation context. HIGHATOMIC area is - * reserved for high-order atomic allocation, so order-0 - * request should skip it. - */ - if (order > 0 && alloc_flags & ALLOC_HARDER) - page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); - if (!page) { - page = __rmqueue(zone, order, migratetype, alloc_flags); - if (!page) { - spin_unlock_irqrestore(&zone->lock, flags); - return NULL; - } - } + page = __rmqueue(zone, order, migratetype, alloc_flags); spin_unlock_irqrestore(&zone->lock, flags); + if (!page) + return NULL; } while (check_new_pages(page, order)); __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); @@ -4003,8 +3855,6 @@ static long __zone_free_pages(struct zone *zone, int alloc_flags, bool safe) * neutral blocks have been consumed, let's keep it simple. */ free_pages = page_state(zone, NR_FREE_FREE, safe); - if (alloc_flags & (ALLOC_HARDER | ALLOC_OOM)) - free_pages += page_state(zone, NR_FREE_HIGHATOMIC, safe); if (IS_ENABLED(CONFIG_CMA) && (alloc_flags & ALLOC_CMA)) free_pages += page_state(zone, NR_FREE_CMA_PAGES, safe); @@ -4098,8 +3948,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, return true; } #endif - if (alloc_harder && !free_area_empty(area, MIGRATE_HIGHATOMIC)) - return true; } return false; } @@ -4340,14 +4188,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, gfp_mask, alloc_flags, ac->migratetype); if (page) { prep_new_page(page, order, gfp_mask, alloc_flags); - - /* - * If this is a high-order atomic allocation then check - * if the pageblock should be reserved for the future - */ - if (unlikely(order && (alloc_flags & ALLOC_HARDER))) - reserve_highatomic_pageblock(page, zone, order); - return page; } else { #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT @@ -4855,7 +4695,6 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, * Shrink them and try again */ if (!page && !drained) { - unreserve_highatomic_pageblock(ac, false); drain_all_pages(NULL); drained = true; goto retry; @@ -5006,10 +4845,8 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, * Make sure we converge to OOM if we cannot make any progress * several times in the row. */ - if (*no_progress_loops > MAX_RECLAIM_RETRIES) { - /* Before OOM, exhaust highatomic_reserve */ - return unreserve_highatomic_pageblock(ac, true); - } + if (*no_progress_loops > MAX_RECLAIM_RETRIES) + return false; /* * Keep reclaiming pages while there is a chance this will lead @@ -6121,7 +5958,6 @@ static void show_migration_types(unsigned char type) [MIGRATE_UNMOVABLE] = 'U', [MIGRATE_MOVABLE] = 'M', [MIGRATE_RECLAIMABLE] = 'E', - [MIGRATE_HIGHATOMIC] = 'H', [MIGRATE_FREE] = 'F', #ifdef CONFIG_CMA [MIGRATE_CMA] = 'C', @@ -6186,7 +6022,7 @@ void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_i " sec_pagetables:%lu bounce:%lu\n" " kernel_misc_reclaimable:%lu\n" " free:%lu free_unmovable:%lu free_movable:%lu\n" - " free_reclaimable:%lu free_highatomic:%lu free_free:%lu\n" + " free_reclaimable:%lu free_free:%lu\n" " free_cma:%lu free_pcp:%lu\n", global_node_page_state(NR_ACTIVE_ANON), global_node_page_state(NR_INACTIVE_ANON), @@ -6209,7 +6045,6 @@ void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_i global_zone_page_state(NR_FREE_UNMOVABLE), global_zone_page_state(NR_FREE_MOVABLE), global_zone_page_state(NR_FREE_RECLAIMABLE), - global_zone_page_state(NR_FREE_HIGHATOMIC), global_zone_page_state(NR_FREE_FREE), global_zone_page_state(NR_FREE_CMA_PAGES), free_pcp); @@ -6293,13 +6128,11 @@ void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_i " free_unmovable:%lukB" " free_movable:%lukB" " free_reclaimable:%lukB" - " free_highatomic:%lukB" " free_free:%lukB" " boost:%lukB" " min:%lukB" " low:%lukB" " high:%lukB" - " reserved_highatomic:%luKB" " active_anon:%lukB" " inactive_anon:%lukB" " active_file:%lukB" @@ -6319,13 +6152,11 @@ void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_i K(zone_page_state(zone, NR_FREE_UNMOVABLE)), K(zone_page_state(zone, NR_FREE_MOVABLE)), K(zone_page_state(zone, NR_FREE_RECLAIMABLE)), - K(zone_page_state(zone, NR_FREE_HIGHATOMIC)), K(zone_page_state(zone, NR_FREE_FREE)), K(zone->watermark_boost), K(min_wmark_pages(zone)), K(low_wmark_pages(zone)), K(high_wmark_pages(zone)), - K(zone->nr_reserved_highatomic), K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON)), K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON)), K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)), diff --git a/mm/vmstat.c b/mm/vmstat.c index c8b8e6e259da87..a2f7b41564dfbd 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1171,7 +1171,6 @@ const char * const vmstat_text[] = { "nr_free_unmovable", "nr_free_movable", "nr_free_reclaimable", - "nr_free_highatomic", "nr_free_free", "nr_zone_inactive_anon", "nr_zone_active_anon",