Skip to content

Commit

Permalink
mm: page_alloc: kill highatomic
Browse files Browse the repository at this point in the history
The highatomic reserves are blocks set aside specifically for
higher-order atomic allocations. Since watermarks are now required to
be met in pageblocks, this is no longer necessary.

Signed-off-by: Johannes Weiner <[email protected]>
  • Loading branch information
hnaz committed Mar 9, 2023
1 parent 61be94a commit 9a4be8b
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 191 deletions.
2 changes: 0 additions & 2 deletions include/linux/gfp.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ static inline int gfp_migratetype(const gfp_t gfp_flags)
BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE);
BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE);
BUILD_BUG_ON((___GFP_RECLAIMABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_RECLAIMABLE);
BUILD_BUG_ON(((___GFP_MOVABLE | ___GFP_RECLAIMABLE) >>
GFP_MOVABLE_SHIFT) != MIGRATE_HIGHATOMIC);

if (unlikely(page_group_by_mobility_disabled))
return MIGRATE_UNMOVABLE;
Expand Down
6 changes: 1 addition & 5 deletions include/linux/mmzone.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ enum migratetype {
MIGRATE_MOVABLE,
MIGRATE_RECLAIMABLE,
MIGRATE_PCPTYPES, /* the number of types on the pcp lists */
MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
MIGRATE_FREE,
MIGRATE_FREE = MIGRATE_PCPTYPES,
#ifdef CONFIG_CMA
/*
* MIGRATE_CMA migration type is designed to mimic the way
Expand Down Expand Up @@ -142,7 +141,6 @@ enum zone_stat_item {
NR_FREE_UNMOVABLE,
NR_FREE_MOVABLE,
NR_FREE_RECLAIMABLE,
NR_FREE_HIGHATOMIC,
NR_FREE_FREE,
NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */
NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE,
Expand Down Expand Up @@ -713,8 +711,6 @@ struct zone {
unsigned long _watermark[NR_WMARK];
unsigned long watermark_boost;

unsigned long nr_reserved_highatomic;

/*
* We don't know if the memory that we're going to allocate will be
* freeable or/and it will be released eventually, so to avoid totally
Expand Down
5 changes: 0 additions & 5 deletions mm/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -778,11 +778,6 @@ extern const struct trace_print_flags pageflag_names[];
extern const struct trace_print_flags vmaflag_names[];
extern const struct trace_print_flags gfpflag_names[];

static inline bool is_migrate_highatomic(enum migratetype migratetype)
{
return migratetype == MIGRATE_HIGHATOMIC;
}

void setup_zone_pageset(struct zone *zone);

struct migration_target_control {
Expand Down
187 changes: 9 additions & 178 deletions mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,6 @@ const char * const migratetype_names[MIGRATE_TYPES] = {
"Unmovable",
"Movable",
"Reclaimable",
"HighAtomic",
"Free",
#ifdef CONFIG_CMA
"CMA",
Expand Down Expand Up @@ -1202,7 +1201,7 @@ static inline void __free_one_page(struct page *page,
* We want to prevent merge between freepages on pageblock
* without fallbacks and normal pageblock. Without this,
* pageblock isolation could cause incorrect freepage or CMA
* accounting or HIGHATOMIC accounting.
* accounting.
*/
if (migratetype != buddy_mt
&& (!migratetype_is_mergeable(migratetype) ||
Expand Down Expand Up @@ -2797,13 +2796,6 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,

old_block_type = get_pageblock_migratetype(page);

/*
* This can happen due to races and we want to prevent broken
* highatomic accounting.
*/
if (is_migrate_highatomic(old_block_type))
goto single_page;

/* Take ownership for orders >= pageblock_order */
if (current_order >= pageblock_order) {
change_pageblock_range(page, current_order, start_type);
Expand Down Expand Up @@ -2918,126 +2910,6 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
return -1;
}

/*
* Reserve a pageblock for exclusive use of high-order atomic allocations if
* there are no empty page blocks that contain a page with a suitable order
*/
static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
unsigned int alloc_order)
{
int mt;
unsigned long max_managed, flags;

/*
* Limit the number reserved to 1 pageblock or roughly 1% of a zone.
* Check is race-prone but harmless.
*/
max_managed = (zone_managed_pages(zone) / 100) + pageblock_nr_pages;
if (zone->nr_reserved_highatomic >= max_managed)
return;

spin_lock_irqsave(&zone->lock, flags);

/* Recheck the nr_reserved_highatomic limit under the lock */
if (zone->nr_reserved_highatomic >= max_managed)
goto out_unlock;

/* Yoink! */
mt = get_pageblock_migratetype(page);
/* Only reserve normal pageblocks (i.e., they can merge with others) */
if (migratetype_is_mergeable(mt)) {
zone->nr_reserved_highatomic += pageblock_nr_pages;
set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC, NULL);
}

out_unlock:
spin_unlock_irqrestore(&zone->lock, flags);
}

/*
* Used when an allocation is about to fail under memory pressure. This
* potentially hurts the reliability of high-order allocations when under
* intense memory pressure but failed atomic allocations should be easier
* to recover from than an OOM.
*
* If @force is true, try to unreserve a pageblock even though highatomic
* pageblock is exhausted.
*/
static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
bool force)
{
struct zonelist *zonelist = ac->zonelist;
unsigned long flags;
struct zoneref *z;
struct zone *zone;
struct page *page;
int order;
bool ret;

for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,
ac->nodemask) {
/*
* Preserve at least one pageblock unless memory pressure
* is really high.
*/
if (!force && zone->nr_reserved_highatomic <=
pageblock_nr_pages)
continue;

spin_lock_irqsave(&zone->lock, flags);
for (order = 0; order < MAX_ORDER; order++) {
struct free_area *area = &(zone->free_area[order]);
int mt;

page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC);
if (!page)
continue;

mt = get_pageblock_migratetype(page);
/*
* In page freeing path, migratetype change is racy so
* we can counter several free pages in a pageblock
* in this loop although we changed the pageblock type
* from highatomic to ac->migratetype. So we should
* adjust the count once.
*/
if (is_migrate_highatomic(mt)) {
/*
* It should never happen but changes to
* locking could inadvertently allow a per-cpu
* drain to add pages to MIGRATE_HIGHATOMIC
* while unreserving so be safe and watch for
* underflows.
*/
zone->nr_reserved_highatomic -= min(
pageblock_nr_pages,
zone->nr_reserved_highatomic);
}

/*
* Convert to ac->migratetype and avoid the normal
* pageblock stealing heuristics. Minimally, the caller
* is doing the work and needs the pages. More
* importantly, if the block was always converted to
* MIGRATE_UNMOVABLE or another type then the number
* of pageblocks that cannot be completely freed
* may increase.
*/
set_pageblock_migratetype(page, ac->migratetype);
ret = move_freepages_block(zone, page, mt,
ac->migratetype, NULL);
if (ret) {
spin_unlock_irqrestore(&zone->lock, flags);
return ret;
}
}
spin_unlock_irqrestore(&zone->lock, flags);
}

return false;
}

/*
* Try finding a free buddy page on the fallback list and put it on the free
* list of requested migratetype, possibly along with other pages from the same
Expand Down Expand Up @@ -3510,18 +3382,11 @@ void free_unref_page(struct page *page, unsigned int order)

/*
* We only track unmovable, reclaimable and movable on pcp lists.
* Place ISOLATE pages on the isolated list because they are being
* offlined but treat HIGHATOMIC as movable pages so we can get those
* areas back if necessary. Otherwise, we may have to free
* excessively into the page allocator
*/
migratetype = get_pcppage_migratetype(page);
if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
if (unlikely(is_migrate_isolate(migratetype) || migratetype == MIGRATE_FREE)) {
free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE);
return;
}
migratetype = MIGRATE_MOVABLE;
free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE);
return;
}

zone = page_zone(page);
Expand Down Expand Up @@ -3740,24 +3605,11 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
unsigned long flags;

do {
page = NULL;
spin_lock_irqsave(&zone->lock, flags);
/*
* order-0 request can reach here when the pcplist is skipped
* due to non-CMA allocation context. HIGHATOMIC area is
* reserved for high-order atomic allocation, so order-0
* request should skip it.
*/
if (order > 0 && alloc_flags & ALLOC_HARDER)
page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
if (!page) {
page = __rmqueue(zone, order, migratetype, alloc_flags);
if (!page) {
spin_unlock_irqrestore(&zone->lock, flags);
return NULL;
}
}
page = __rmqueue(zone, order, migratetype, alloc_flags);
spin_unlock_irqrestore(&zone->lock, flags);
if (!page)
return NULL;
} while (check_new_pages(page, order));

__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
Expand Down Expand Up @@ -4003,8 +3855,6 @@ static long __zone_free_pages(struct zone *zone, int alloc_flags, bool safe)
* neutral blocks have been consumed, let's keep it simple.
*/
free_pages = page_state(zone, NR_FREE_FREE, safe);
if (alloc_flags & (ALLOC_HARDER | ALLOC_OOM))
free_pages += page_state(zone, NR_FREE_HIGHATOMIC, safe);
if (IS_ENABLED(CONFIG_CMA) && (alloc_flags & ALLOC_CMA))
free_pages += page_state(zone, NR_FREE_CMA_PAGES, safe);

Expand Down Expand Up @@ -4098,8 +3948,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
return true;
}
#endif
if (alloc_harder && !free_area_empty(area, MIGRATE_HIGHATOMIC))
return true;
}
return false;
}
Expand Down Expand Up @@ -4340,14 +4188,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
gfp_mask, alloc_flags, ac->migratetype);
if (page) {
prep_new_page(page, order, gfp_mask, alloc_flags);

/*
* If this is a high-order atomic allocation then check
* if the pageblock should be reserved for the future
*/
if (unlikely(order && (alloc_flags & ALLOC_HARDER)))
reserve_highatomic_pageblock(page, zone, order);

return page;
} else {
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
Expand Down Expand Up @@ -4855,7 +4695,6 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
* Shrink them and try again
*/
if (!page && !drained) {
unreserve_highatomic_pageblock(ac, false);
drain_all_pages(NULL);
drained = true;
goto retry;
Expand Down Expand Up @@ -5006,10 +4845,8 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
* Make sure we converge to OOM if we cannot make any progress
* several times in the row.
*/
if (*no_progress_loops > MAX_RECLAIM_RETRIES) {
/* Before OOM, exhaust highatomic_reserve */
return unreserve_highatomic_pageblock(ac, true);
}
if (*no_progress_loops > MAX_RECLAIM_RETRIES)
return false;

/*
* Keep reclaiming pages while there is a chance this will lead
Expand Down Expand Up @@ -6121,7 +5958,6 @@ static void show_migration_types(unsigned char type)
[MIGRATE_UNMOVABLE] = 'U',
[MIGRATE_MOVABLE] = 'M',
[MIGRATE_RECLAIMABLE] = 'E',
[MIGRATE_HIGHATOMIC] = 'H',
[MIGRATE_FREE] = 'F',
#ifdef CONFIG_CMA
[MIGRATE_CMA] = 'C',
Expand Down Expand Up @@ -6186,7 +6022,7 @@ void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_i
" sec_pagetables:%lu bounce:%lu\n"
" kernel_misc_reclaimable:%lu\n"
" free:%lu free_unmovable:%lu free_movable:%lu\n"
" free_reclaimable:%lu free_highatomic:%lu free_free:%lu\n"
" free_reclaimable:%lu free_free:%lu\n"
" free_cma:%lu free_pcp:%lu\n",
global_node_page_state(NR_ACTIVE_ANON),
global_node_page_state(NR_INACTIVE_ANON),
Expand All @@ -6209,7 +6045,6 @@ void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_i
global_zone_page_state(NR_FREE_UNMOVABLE),
global_zone_page_state(NR_FREE_MOVABLE),
global_zone_page_state(NR_FREE_RECLAIMABLE),
global_zone_page_state(NR_FREE_HIGHATOMIC),
global_zone_page_state(NR_FREE_FREE),
global_zone_page_state(NR_FREE_CMA_PAGES),
free_pcp);
Expand Down Expand Up @@ -6293,13 +6128,11 @@ void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_i
" free_unmovable:%lukB"
" free_movable:%lukB"
" free_reclaimable:%lukB"
" free_highatomic:%lukB"
" free_free:%lukB"
" boost:%lukB"
" min:%lukB"
" low:%lukB"
" high:%lukB"
" reserved_highatomic:%luKB"
" active_anon:%lukB"
" inactive_anon:%lukB"
" active_file:%lukB"
Expand All @@ -6319,13 +6152,11 @@ void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_i
K(zone_page_state(zone, NR_FREE_UNMOVABLE)),
K(zone_page_state(zone, NR_FREE_MOVABLE)),
K(zone_page_state(zone, NR_FREE_RECLAIMABLE)),
K(zone_page_state(zone, NR_FREE_HIGHATOMIC)),
K(zone_page_state(zone, NR_FREE_FREE)),
K(zone->watermark_boost),
K(min_wmark_pages(zone)),
K(low_wmark_pages(zone)),
K(high_wmark_pages(zone)),
K(zone->nr_reserved_highatomic),
K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON)),
K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON)),
K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)),
Expand Down
1 change: 0 additions & 1 deletion mm/vmstat.c
Original file line number Diff line number Diff line change
Expand Up @@ -1171,7 +1171,6 @@ const char * const vmstat_text[] = {
"nr_free_unmovable",
"nr_free_movable",
"nr_free_reclaimable",
"nr_free_highatomic",
"nr_free_free",
"nr_zone_inactive_anon",
"nr_zone_active_anon",
Expand Down

0 comments on commit 9a4be8b

Please sign in to comment.