@@ -228,30 +228,23 @@ async fn ws_client_actor_inner(
228
228
229
229
let addr = client. module . info ( ) . database_identity ;
230
230
231
- let client_identity = client. sender ( ) . id . identity ;
232
- let connection_id = client. sender ( ) . id . connection_id ;
233
-
234
- scopeguard:: defer!(
235
- if let Err ( e) = WORKER_METRICS
236
- . client_connection_incoming_queue_length
237
- . remove_label_values( & addr, & client_identity, & connection_id) {
238
- log:: error!( "Failed to `remove_label_values` for `client_connection_incoming_queue_length`: {e:?}" ) ;
239
- } ;
240
-
241
- if let Err ( e) = WORKER_METRICS
242
- . client_connection_outgoing_queue_length
243
- . remove_label_values( & addr, & client_identity, & connection_id) {
244
- log:: error!( "Failed to `remove_label_values` for `client_connection_outgoing_queue_length`: {e:?}" ) ;
245
- }
246
- ) ;
247
-
248
- let incoming_queue_length_metric = WORKER_METRICS
249
- . client_connection_incoming_queue_length
250
- . with_label_values ( & addr, & client_identity, & connection_id) ;
251
-
252
- let outgoing_queue_length_metric = WORKER_METRICS
253
- . client_connection_outgoing_queue_length
254
- . with_label_values ( & addr, & client_identity, & connection_id) ;
231
+ // Grab handles on the total incoming and outgoing queue length metrics,
232
+ // which we'll increment and decrement as we push into and pull out of those queues.
233
+ // Note that `total_outgoing_queue_length` is incremented separately,
234
+ // by `ClientConnectionSender::send` in core/src/client/client_connection.rs;
235
+ // we're only responsible for decrementing that one.
236
+ // Also note that much care must be taken to clean up these metrics when the connection closes!
237
+ // Any path which exits this function must decrement each of these metrics
238
+ // by the number of messages still waiting in this client's queue,
239
+ // or else they will grow without bound as clients disconnect, and be useless.
240
+ let incoming_queue_length_metric = WORKER_METRICS . total_incoming_queue_length . with_label_values ( & addr) ;
241
+ let outgoing_queue_length_metric = WORKER_METRICS . total_outgoing_queue_length . with_label_values ( & addr) ;
242
+
243
+ let clean_up_metrics = |message_queue : & VecDeque < ( DataMessage , Instant ) > ,
244
+ sendrx : & mpsc:: Receiver < SerializableMessage > | {
245
+ incoming_queue_length_metric. sub ( message_queue. len ( ) as _ ) ;
246
+ outgoing_queue_length_metric. sub ( sendrx. len ( ) as _ ) ;
247
+ } ;
255
248
256
249
loop {
257
250
rx_buf. clear ( ) ;
@@ -289,7 +282,10 @@ async fn ws_client_actor_inner(
289
282
continue ;
290
283
}
291
284
// the client sent us a close frame
292
- None => break ,
285
+ None => {
286
+ clean_up_metrics( & message_queue, & sendrx) ;
287
+ break
288
+ } ,
293
289
} ,
294
290
295
291
// If we have an outgoing message to send, send it off.
@@ -302,31 +298,31 @@ async fn ws_client_actor_inner(
302
298
// even though the websocket RFC allows it. should we fork tungstenite?
303
299
log:: info!( "dropping messages due to ws already being closed: {:?}" , & rx_buf[ ..n] ) ;
304
300
} else {
305
- let send_all = async {
306
- for msg in rx_buf. drain( ..n) {
307
- let workload = msg. workload( ) ;
308
- let num_rows = msg. num_rows( ) ;
309
-
310
- let msg = datamsg_to_wsmsg( serialize( msg, client. config) ) ;
311
-
312
- // These metrics should be updated together,
313
- // or not at all.
314
- if let ( Some ( workload) , Some ( num_rows) ) = ( workload, num_rows) {
315
- WORKER_METRICS
316
- . websocket_sent_num_rows
317
- . with_label_values( & addr, & workload)
318
- . observe( num_rows as f64 ) ;
319
- WORKER_METRICS
320
- . websocket_sent_msg_size
321
- . with_label_values( & addr, & workload)
322
- . observe( msg. len( ) as f64 ) ;
323
- }
324
- // feed() buffers the message, but does not necessarily send it
325
- ws. feed( msg) . await ?;
301
+ let send_all = async {
302
+ for msg in rx_buf. drain( ..n) {
303
+ let workload = msg. workload( ) ;
304
+ let num_rows = msg. num_rows( ) ;
305
+
306
+ let msg = datamsg_to_wsmsg( serialize( msg, client. config) ) ;
307
+
308
+ // These metrics should be updated together,
309
+ // or not at all.
310
+ if let ( Some ( workload) , Some ( num_rows) ) = ( workload, num_rows) {
311
+ WORKER_METRICS
312
+ . websocket_sent_num_rows
313
+ . with_label_values( & addr, & workload)
314
+ . observe( num_rows as f64 ) ;
315
+ WORKER_METRICS
316
+ . websocket_sent_msg_size
317
+ . with_label_values( & addr, & workload)
318
+ . observe( msg. len( ) as f64 ) ;
326
319
}
327
- // now we flush all the messages to the socket
328
- ws. flush( ) . await
329
- } ;
320
+ // feed() buffers the message, but does not necessarily send it
321
+ ws. feed( msg) . await ?;
322
+ }
323
+ // now we flush all the messages to the socket
324
+ ws. flush( ) . await
325
+ } ;
330
326
// Flush the websocket while continuing to poll the `handle_queue`,
331
327
// to avoid deadlocks or delays due to enqueued futures holding resources.
332
328
let send_all = also_poll( send_all, make_progress( & mut current_message) ) ;
@@ -375,6 +371,7 @@ async fn ws_client_actor_inner(
375
371
} else {
376
372
// the client never responded to our ping; drop them without trying to send them a Close
377
373
log:: warn!( "client {} timed out" , client. id) ;
374
+ clean_up_metrics( & message_queue, & sendrx) ;
378
375
break ;
379
376
}
380
377
}
0 commit comments