diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 47b9ebd4a74f..0e427a9997f3 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -16,6 +16,7 @@ #define IRQ_WORK_BUSY 2UL #define IRQ_WORK_FLAGS 3UL #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */ +#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */ struct irq_work { unsigned long flags; diff --git a/kernel/irq_work.c b/kernel/irq_work.c index bcf107ce0854..0ddaf1e66d8c 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -65,6 +66,8 @@ void __weak arch_irq_work_raise(void) */ bool irq_work_queue_on(struct irq_work *work, int cpu) { + struct llist_head *list; + /* All work should have been flushed before going offline */ WARN_ON_ONCE(cpu_is_offline(cpu)); @@ -75,7 +78,12 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) if (!irq_work_claim(work)) return false; - if (llist_add(&work->llnode, &per_cpu(raised_list, cpu))) + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ)) + list = &per_cpu(lazy_list, cpu); + else + list = &per_cpu(raised_list, cpu); + + if (llist_add(&work->llnode, list)) arch_send_call_function_single_ipi(cpu); return true; @@ -86,6 +94,9 @@ EXPORT_SYMBOL_GPL(irq_work_queue_on); /* Enqueue the irq work @work on the current CPU */ bool irq_work_queue(struct irq_work *work) { + struct llist_head *list; + bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); + /* Only queue if not already pending */ if (!irq_work_claim(work)) return false; @@ -93,13 +104,15 @@ bool irq_work_queue(struct irq_work *work) /* Queue the entry and raise the IPI if needed. */ preempt_disable(); - /* If the work is "lazy", handle it from next tick if any */ - if (work->flags & IRQ_WORK_LAZY) { - if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) && - tick_nohz_tick_stopped()) - arch_irq_work_raise(); - } else { - if (llist_add(&work->llnode, this_cpu_ptr(&raised_list))) + lazy_work = work->flags & IRQ_WORK_LAZY; + + if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ))) + list = this_cpu_ptr(&lazy_list); + else + list = this_cpu_ptr(&raised_list); + + if (llist_add(&work->llnode, list)) { + if (!lazy_work || tick_nohz_tick_stopped()) arch_irq_work_raise(); } @@ -116,9 +129,8 @@ bool irq_work_needs_cpu(void) raised = this_cpu_ptr(&raised_list); lazy = this_cpu_ptr(&lazy_list); - if (llist_empty(raised) || arch_irq_work_has_interrupt()) - if (llist_empty(lazy)) - return false; + if (llist_empty(raised) && llist_empty(lazy)) + return false; /* All work should have been flushed before going offline */ WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); @@ -132,7 +144,7 @@ static void irq_work_run_list(struct llist_head *list) struct irq_work *work; struct llist_node *llnode; - BUG_ON(!irqs_disabled()); + BUG_ON_NONRT(!irqs_disabled()); if (llist_empty(list)) return; @@ -169,7 +181,16 @@ static void irq_work_run_list(struct llist_head *list) void irq_work_run(void) { irq_work_run_list(this_cpu_ptr(&raised_list)); - irq_work_run_list(this_cpu_ptr(&lazy_list)); + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) { + /* + * NOTE: we raise softirq via IPI for safety, + * and execute in irq_work_tick() to move the + * overhead from hard to soft irq context. + */ + if (!llist_empty(this_cpu_ptr(&lazy_list))) + raise_softirq(TIMER_SOFTIRQ); + } else + irq_work_run_list(this_cpu_ptr(&lazy_list)); } EXPORT_SYMBOL_GPL(irq_work_run); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4d5fcfa3a96e..5f66d62bca5b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6046,6 +6046,7 @@ static int init_rootdomain(struct root_domain *rd) rd->rto_cpu = -1; raw_spin_lock_init(&rd->rto_lock); init_irq_work(&rd->rto_push_work, rto_push_irq_work_func); + rd->rto_push_work.flags |= IRQ_WORK_HARD_IRQ; #endif init_dl_bw(&rd->dl_bw); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 586c3bcc3e01..87fd56b88f01 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -220,6 +220,7 @@ static void nohz_full_kick_func(struct irq_work *work) static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { .func = nohz_full_kick_func, + .flags = IRQ_WORK_HARD_IRQ, }; /* diff --git a/kernel/time/timer.c b/kernel/time/timer.c index a416cee45801..e345117292fe 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1661,7 +1661,7 @@ void update_process_times(int user_tick) scheduler_tick(); run_local_timers(); rcu_check_callbacks(user_tick); -#ifdef CONFIG_IRQ_WORK +#if defined(CONFIG_IRQ_WORK) && !defined(CONFIG_PREEMPT_RT_FULL) if (in_irq()) irq_work_tick(); #endif @@ -1717,6 +1717,10 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) + irq_work_tick(); +#endif + __run_timers(base); if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));