@@ -100,7 +100,7 @@ int plan_x_cpdom_migration(void)
100100 return 0 ;
101101 }
102102 if ((stealee_threshold <= max_sc_load || overflow_running ) &&
103- (stealer_threshold < min_sc_load )) {
103+ (stealer_threshold < min_sc_load )) {
104104 /*
105105 * If there is a overloaded domain, always try to steal.
106106 * <~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~>
@@ -123,7 +123,7 @@ int plan_x_cpdom_migration(void)
123123 * Under-loaded active domains become a stealer.
124124 */
125125 if (cpdomc -> nr_active_cpus &&
126- cpdomc -> sc_load <= stealer_threshold ) {
126+ cpdomc -> sc_load <= stealer_threshold ) {
127127 WRITE_ONCE (cpdomc -> is_stealer , true);
128128 WRITE_ONCE (cpdomc -> is_stealee , false);
129129 continue ;
@@ -133,7 +133,7 @@ int plan_x_cpdom_migration(void)
133133 * Over-loaded or non-active domains become a stealee.
134134 */
135135 if (!cpdomc -> nr_active_cpus ||
136- cpdomc -> sc_load >= stealee_threshold ) {
136+ cpdomc -> sc_load >= stealee_threshold ) {
137137 WRITE_ONCE (cpdomc -> is_stealer , false);
138138 WRITE_ONCE (cpdomc -> is_stealee , true);
139139 nr_stealee ++ ;
@@ -173,6 +173,48 @@ static bool consume_dsq(struct cpdom_ctx *cpdomc, u64 dsq_id)
173173 return ret ;
174174}
175175
176+ /*
177+ * Attempts to peek the vtime of the task at the head of the DSQ, or returns U64_MAX if the DSQ is empty.
178+ */
179+ static int peek_vtime (u64 dsq_id )
180+ {
181+ struct task_struct * task ;
182+ task = __COMPAT_scx_bpf_dsq_peek (dsq_id );
183+ return task ? task -> scx .dsq_vtime : U64_MAX ;
184+ }
185+
186+ /*
187+ * Racy operation that returns the cpu that which appears to have the lowest vtime at its head.
188+ */
189+ static int pick_cpu_with_lowest_vtime (struct cpdom_ctx * cpdomc )
190+ {
191+ u64 lowest_vtime = U64_MAX ;
192+ int pick_cpu = - ENOENT ;
193+ int cpu , i , j ;
194+
195+ if (!per_cpu_dsq )
196+ return - ENOENT ;
197+
198+ bpf_for (i , 0 , LAVD_CPU_ID_MAX /64 ) {
199+ u64 cpumask = cpdomc -> __cpumask [i ];
200+ bpf_for (j , 0 , 64 ) {
201+ if (cpumask & 0x1LLU << j ) {
202+ u64 vtime ;
203+ cpu = (i * 64 ) + j ;
204+ if (cpu >= __nr_cpu_ids )
205+ break ;
206+ vtime = peek_vtime (cpu_to_dsq (cpu ));
207+ if (vtime < lowest_vtime ) {
208+ lowest_vtime = vtime ;
209+ pick_cpu = cpu ;
210+ }
211+ }
212+ }
213+ }
214+
215+ return pick_cpu ;
216+ }
217+
176218/*
177219 * For simplicity, try to just steal from the CPU with
178220 * the highest number of queued_tasks in this domain.
@@ -256,7 +298,7 @@ static bool try_to_steal_task(struct cpdom_ctx *cpdomc)
256298 if (!READ_ONCE (cpdomc_pick -> is_stealee ) || !cpdomc_pick -> is_valid )
257299 continue ;
258300
259- pick_cpu = pick_most_loaded_cpu (cpdomc_pick );
301+ pick_cpu = pick_cpu_with_lowest_vtime (cpdomc_pick );
260302 if (pick_cpu >= 0 )
261303 dsq_id = cpu_to_dsq (pick_cpu );
262304 else
@@ -333,7 +375,7 @@ static bool force_to_steal_task(struct cpdom_ctx *cpdomc)
333375 if (!cpdomc_pick -> is_valid )
334376 continue ;
335377
336- pick_cpu = pick_most_loaded_cpu (cpdomc_pick );
378+ pick_cpu = pick_cpu_with_lowest_vtime (cpdomc_pick );
337379 if (pick_cpu >= 0 )
338380 dsq_id = cpu_to_dsq (pick_cpu );
339381 else
@@ -350,7 +392,6 @@ static bool force_to_steal_task(struct cpdom_ctx *cpdomc)
350392static bool consume_task (u64 cpu_dsq_id , u64 cpdom_dsq_id )
351393{
352394 struct cpdom_ctx * cpdomc ;
353- struct task_struct * p ;
354395 u64 vtime = U64_MAX , dsq_id = cpu_dsq_id ;
355396
356397 cpdomc = MEMBER_VPTR (cpdom_ctxs , [dsq_to_cpdom (cpdom_dsq_id )]);
@@ -364,20 +405,13 @@ static bool consume_task(u64 cpu_dsq_id, u64 cpdom_dsq_id)
364405 * a task from any of stealee domains probabilistically.
365406 */
366407 if (nr_cpdoms > 1 && READ_ONCE (cpdomc -> is_stealer ) &&
367- try_to_steal_task (cpdomc ))
408+ try_to_steal_task (cpdomc ))
368409 goto x_domain_migration_out ;
369410
370411 if (per_cpu_dsq ) {
371- bpf_for_each (scx_dsq , p , cpu_dsq_id , 0 ) {
372- vtime = p -> scx .dsq_vtime ;
373- break ;
374- }
375-
376- bpf_for_each (scx_dsq , p , cpdom_dsq_id , 0 ) {
377- if (p -> scx .dsq_vtime < vtime )
378- dsq_id = cpdom_dsq_id ;
379- break ;
380- }
412+ vtime = peek_vtime (cpu_dsq_id );
413+ if (peek_vtime (cpdom_dsq_id ) < vtime )
414+ dsq_id = cpdom_dsq_id ;
381415 } else {
382416 dsq_id = cpdom_dsq_id ;
383417 }
0 commit comments