Skip to content

Commit dc7f07c

Browse files
joelagnelchantra
authored andcommitted
rcu/nocb: Fix possible bugs in rcu_barrier()
When going through the lazy-rcu work, I noticed that rcu_barrier_entrain() does not really wake up the rcuog GP thread in any path after entraining. This means it is possible the GP thread is not awakened soon (say there were no CBs in the cblist after entraining time). Further, nothing appears to be calling the rcu_barrier callback directly in the case the ->cblist was empty which means if the IPI gets delayed enough to make the ->cblist empty and it turns out to be the last CPU holding, then nothing calls completes rcu_state.barrier_completion. Fix both these issues. A note on the wakeup, there are 3 cases AFAICS after the call to rcu_nocb_flush_bypass(): 1. The rdp->cblist has pending CBs. 2. The rdp->cblist has all done CBs. 3. The rdp->cblist has no CBs at all (say the IPI took a long time to arrive and some other path dequeued them in the meanwhile). For #3, entraining a CB is not needed and we should bail. For #1 and needed. But for #2 it is needed. Signed-off-by: Joel Fernandes (Google) <[email protected]>
1 parent da314e0 commit dc7f07c

File tree

1 file changed

+15
-7
lines changed

1 file changed

+15
-7
lines changed

kernel/rcu/tree.c

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3910,10 +3910,11 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
39103910
/*
39113911
* If needed, entrain an rcu_barrier() callback on rdp->cblist.
39123912
*/
3913-
static void rcu_barrier_entrain(struct rcu_data *rdp)
3913+
static void rcu_barrier_entrain(struct rcu_data *rdp, unsigned long flags)
39143914
{
39153915
unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence);
39163916
unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap);
3917+
bool was_alldone;
39173918

39183919
lockdep_assert_held(&rcu_state.barrier_lock);
39193920
if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq))
@@ -3922,14 +3923,20 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
39223923
rdp->barrier_head.func = rcu_barrier_callback;
39233924
debug_rcu_head_queue(&rdp->barrier_head);
39243925
rcu_nocb_lock(rdp);
3926+
was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
39253927
WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
3928+
39263929
if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {
39273930
atomic_inc(&rcu_state.barrier_cpu_count);
3931+
__call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */
39283932
} else {
3933+
/* rdp->cblist is empty so directly call the callback. */
3934+
atomic_inc(&rcu_state.barrier_cpu_count);
3935+
rcu_barrier_callback(&rdp->barrier_head);
39293936
debug_rcu_head_unqueue(&rdp->barrier_head);
39303937
rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);
3938+
rcu_nocb_unlock(rdp);
39313939
}
3932-
rcu_nocb_unlock(rdp);
39333940
smp_store_release(&rdp->barrier_seq_snap, gseq);
39343941
}
39353942

@@ -3938,15 +3945,16 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
39383945
*/
39393946
static void rcu_barrier_handler(void *cpu_in)
39403947
{
3948+
unsigned long flags;
39413949
uintptr_t cpu = (uintptr_t)cpu_in;
39423950
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
39433951

39443952
lockdep_assert_irqs_disabled();
39453953
WARN_ON_ONCE(cpu != rdp->cpu);
39463954
WARN_ON_ONCE(cpu != smp_processor_id());
3947-
raw_spin_lock(&rcu_state.barrier_lock);
3948-
rcu_barrier_entrain(rdp);
3949-
raw_spin_unlock(&rcu_state.barrier_lock);
3955+
raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
3956+
rcu_barrier_entrain(rdp, flags);
3957+
raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
39503958
}
39513959

39523960
/**
@@ -4013,7 +4021,7 @@ void rcu_barrier(void)
40134021
continue;
40144022
}
40154023
if (!rcu_rdp_cpu_online(rdp)) {
4016-
rcu_barrier_entrain(rdp);
4024+
rcu_barrier_entrain(rdp, flags);
40174025
WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
40184026
raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
40194027
rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, rcu_state.barrier_sequence);
@@ -4339,7 +4347,7 @@ void rcutree_migrate_callbacks(int cpu)
43394347

43404348
raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
43414349
WARN_ON_ONCE(rcu_rdp_cpu_online(rdp));
4342-
rcu_barrier_entrain(rdp);
4350+
rcu_barrier_entrain(rdp, flags);
43434351
my_rdp = this_cpu_ptr(&rcu_data);
43444352
my_rnp = my_rdp->mynode;
43454353
rcu_nocb_lock(my_rdp); /* irqs already disabled. */

0 commit comments

Comments
 (0)