3
3
* This software may be used and distributed according to the terms of the
4
4
* GNU General Public License version 2.
5
5
*
6
- * This header adds L3 cache awareness to scx_mitosis by defining BPF
7
- * maps for CPU-to-L3 domain mappings. It provides functions to
6
+ * This header assists adding L3 cache awareness to scx_mitosis by defining
7
+ * maps and fns for managing CPU-to-L3 domain mappings. It provides code to
8
8
* recalculate per-L3 CPU counts within cells and implements weighted
9
9
* random L3 selection for tasks. It also tracks work-stealing
10
10
* statistics for cross-L3 task migrations.
14
14
#include "mitosis.bpf.h"
15
15
#include "intf.h"
16
16
17
- // It's also an option to just compute this from the cpu_to_l3 map.
18
- struct l3_cpu_mask {
19
- unsigned long cpumask [CPUMASK_LONG_ENTRIES ];
20
- };
17
+ typedef u32 l3_id_t ;
18
+ #define L3_INVALID ((l3_id_t) ~0u)
19
+
20
+ // Configure how aggressively we steal work.
21
+ // When task is detected as a steal candidate, skip it this many times
22
+ // On a web server workload, 100 reduced steal count by ~90%
23
+ #ifdef MITOSIS_ENABLE_STEALING
24
+ #define PREVENT_N_STEALS 10
25
+ #endif
21
26
22
27
/* Work stealing statistics map - accessible from both BPF and userspace */
23
28
struct steal_stats_map {
@@ -38,27 +43,46 @@ struct cpu_to_l3_map {
38
43
struct l3_to_cpus_map {
39
44
__uint (type , BPF_MAP_TYPE_ARRAY );
40
45
__type (key , u32 );
41
- __type (value , struct l3_cpu_mask );
46
+ __type (value , struct cpumask );
42
47
__uint (max_entries , MAX_L3S );
43
48
};
44
49
45
- extern struct cpu_to_l3_map cpu_to_l3 SEC (".maps" );
46
- extern struct l3_to_cpus_map l3_to_cpus SEC (".maps" );
47
- extern struct steal_stats_map steal_stats SEC (".maps" );
50
+ extern struct cpu_to_l3_map cpu_to_l3 ;
51
+ extern struct l3_to_cpus_map l3_to_cpus ;
52
+ extern struct steal_stats_map steal_stats ;
53
+
54
+ static inline const bool l3_is_valid (u32 l3_id ) {
55
+ if (l3_id == L3_INVALID )
56
+ return false;
57
+
58
+ return (l3_id >= 0 ) && (l3_id < MAX_L3S );
59
+ }
60
+
61
+ static inline void init_task_l3 (struct task_ctx * tctx ) {
62
+ tctx -> l3 = L3_INVALID ;
63
+
64
+ #if MITOSIS_ENABLE_STEALING
65
+ tctx -> pending_l3 = L3_INVALID ;
66
+ tctx -> steal_count = 0 ;
67
+ tctx -> last_stolen_at = 0 ;
68
+ tctx -> steals_prevented = 0 ;
69
+ #endif
70
+
71
+ }
48
72
49
73
static inline const struct cpumask * lookup_l3_cpumask (u32 l3 )
50
74
{
51
- struct l3_cpu_mask * mask ;
75
+ struct cpumask * mask ;
52
76
53
77
if (!(mask = bpf_map_lookup_elem (& l3_to_cpus , & l3 ))) {
54
78
scx_bpf_error ("no l3 cpumask, l3: %d, %p" , l3 , & l3_to_cpus );
55
79
return NULL ;
56
80
}
57
81
58
- return ( const struct cpumask * ) mask ;
82
+ return mask ;
59
83
}
60
84
61
- /* Recompute cell->l3_cpu_cnt[] after cell cpumask changes (no persistent kptrs). */
85
+ /* Recompute cell->l3_cpu_cnt[] after cell cpumask changes */
62
86
static __always_inline void recalc_cell_l3_counts (u32 cell_idx )
63
87
{
64
88
struct cell * cell = lookup_cell (cell_idx );
@@ -89,7 +113,6 @@ static __always_inline void recalc_cell_l3_counts(u32 cell_idx)
89
113
continue ;
90
114
}
91
115
92
- /* ok: dst is bpf_cpumask*, sources are (RCU cpumask*, plain cpumask*) */
93
116
bpf_cpumask_and (tmp , cell_mask , l3_mask );
94
117
95
118
u32 cnt = bpf_cpumask_weight ((const struct cpumask * )tmp );
@@ -113,24 +136,24 @@ static __always_inline void recalc_cell_l3_counts(u32 cell_idx)
113
136
* have higher probability of being selected.
114
137
*
115
138
* @cell_id: The cell ID to select an L3 from
116
- * @return: L3 ID on success, INVALID_L3_ID on error, or 0 as fallback
139
+ * @return: L3 ID on success, L3_INVALID on error
117
140
*/
118
141
static inline s32 pick_l3_for_task (u32 cell_id )
119
142
{
120
143
struct cell * cell ;
121
144
u32 l3 , target , cur = 0 ;
122
- s32 ret = INVALID_L3_ID ;
145
+ s32 ret = L3_INVALID ;
123
146
124
147
/* Look up the cell structure */
125
148
if (!(cell = lookup_cell (cell_id )))
126
- return INVALID_L3_ID ;
149
+ return L3_INVALID ;
127
150
128
151
/* Handle case where cell has no CPUs assigned yet */
129
152
if (!cell -> cpu_cnt ) {
130
153
scx_bpf_error (
131
154
"pick_l3_for_task: cell %d has no CPUs accounted yet" ,
132
155
cell_id );
133
- return INVALID_L3_ID ;
156
+ return L3_INVALID ;
134
157
}
135
158
136
159
/* Generate random target value in range [0, cpu_cnt) */
@@ -148,3 +171,88 @@ static inline s32 pick_l3_for_task(u32 cell_id)
148
171
}
149
172
return ret ;
150
173
}
174
+
175
+ #ifdef MITOSIS_ENABLE_STEALING
176
+
177
+ static inline bool try_stealing_this_task (struct task_ctx * task_ctx ,
178
+ s32 local_l3 , u64 candidate_dsq )
179
+ {
180
+ // Attempt the steal, can fail beacuse it's a race.
181
+ if (!scx_bpf_dsq_move_to_local (candidate_dsq ))
182
+ return false;
183
+
184
+ // We got the task!
185
+ task_ctx -> steal_count ++ ;
186
+ task_ctx -> last_stolen_at = scx_bpf_now ();
187
+ /* Retag to thief L3 (the one for this cpu) */
188
+ task_ctx -> pending_l3 = local_l3 ;
189
+ task_ctx -> steals_prevented = 0 ;
190
+
191
+ /* Increment steal counter in map */
192
+ u32 key = 0 ;
193
+ u64 * count = bpf_map_lookup_elem (& steal_stats , & key );
194
+ // NOTE: This could get expensive, but I'm not anticipating that many steals. Percpu if we care.
195
+ if (count )
196
+ __sync_fetch_and_add (count , 1 );
197
+
198
+ return true;
199
+ }
200
+
201
+ /* Work stealing:
202
+ * Scan sibling (cell,L3) DSQs in the same cell and steal the first queued task if it can run on this cpu
203
+ */
204
+ static inline bool try_stealing_work (u32 cell , s32 local_l3 )
205
+ {
206
+ if (!l3_is_valid (local_l3 ))
207
+ scx_bpf_error ("try_stealing_work: invalid local_l3" );
208
+
209
+ struct cell * cell_ptr = lookup_cell (cell );
210
+ if (!cell_ptr )
211
+ scx_bpf_error ("try_stealing_work: invalid cell" );
212
+
213
+ // Loop over all L3s, looking for a queued task to steal
214
+ u32 candidate_l3 ;
215
+ bpf_for (candidate_l3 , 0 , nr_l3 )
216
+ {
217
+ if (candidate_l3 == (u32 )local_l3 )
218
+ continue ;
219
+
220
+ // Skip L3s that are not present in this cell
221
+ // Note: rechecking cell_ptr for verifier
222
+ if (cell_ptr && cell_ptr -> l3_cpu_cnt [candidate_l3 ] == 0 )
223
+ continue ;
224
+
225
+ u64 candidate_dsq = get_cell_l3_dsq_id (cell , candidate_l3 );
226
+
227
+ struct task_struct * task = NULL ;
228
+ struct task_ctx * task_ctx ;
229
+ // I'm only using this for the verifier
230
+ bool found_task = false;
231
+
232
+ // Just a trick for peeking the head element
233
+ bpf_for_each (scx_dsq , task , candidate_dsq , 0 )
234
+ {
235
+ task_ctx = lookup_task_ctx (task );
236
+ found_task = (task_ctx != NULL );
237
+ break ;
238
+ }
239
+
240
+ // No task? Try next L3
241
+ if (!found_task )
242
+ continue ;
243
+
244
+ // This knob throttles stealing.
245
+ // TODO: make runtime configurable
246
+ if (task_ctx -> steals_prevented ++ < PREVENT_N_STEALS ) {
247
+ continue ;
248
+ }
249
+
250
+ if (!try_stealing_this_task (task_ctx , local_l3 , candidate_dsq ))
251
+ continue ;
252
+
253
+ // Success, we got a task (no guarantee it was the one we peeked though...)
254
+ return true;
255
+ }
256
+ return false;
257
+ }
258
+ #endif
0 commit comments