From aabe8e5e45d5fa7796210237733b0b1169310321 Mon Sep 17 00:00:00 2001 From: Ping Yu Date: Fri, 3 Nov 2023 16:31:20 +0800 Subject: [PATCH] fix get tso hang Signed-off-by: Ping Yu --- src/pd/retry.rs | 52 +++++++++++++++++++++++++++++---------------- src/pd/timestamp.rs | 4 ++-- 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/src/pd/retry.rs b/src/pd/retry.rs index 0e65e13b..2dd54487 100644 --- a/src/pd/retry.rs +++ b/src/pd/retry.rs @@ -70,18 +70,12 @@ impl RetryClient { } } -macro_rules! retry { - ($self: ident, $tag: literal, |$cluster: ident| $call: expr) => {{ +macro_rules! retry_core { + ($self: ident, $tag: literal, $call: expr) => {{ let stats = pd_stats($tag); let mut last_err = Ok(()); for _ in 0..LEADER_CHANGE_RETRY { - // use the block here to drop the guard of the read lock, - // otherwise `reconnect` will try to acquire the write lock and results in a deadlock - let res = { - let $cluster = &mut $self.cluster.write().await.0; - let res = $call.await; - res - }; + let res = $call; match stats.done(res) { Ok(r) => return Ok(r), @@ -103,6 +97,28 @@ macro_rules! retry { }}; } +macro_rules! retry_mut { + ($self: ident, $tag: literal, |$cluster: ident| $call: expr) => {{ + retry_core!($self, $tag, { + // use the block here to drop the guard of the lock, + // otherwise `reconnect` will try to acquire the write lock and results in a deadlock + let $cluster = &mut $self.cluster.write().await.0; + $call.await + }) + }}; +} + +macro_rules! retry { + ($self: ident, $tag: literal, |$cluster: ident| $call: expr) => {{ + retry_core!($self, $tag, { + // use the block here to drop the guard of the lock, + // otherwise `reconnect` will try to acquire the write lock and results in a deadlock + let $cluster = &$self.cluster.read().await.0; + $call.await + }) + }}; +} + impl RetryClient { pub async fn connect( endpoints: &[String], @@ -127,7 +143,7 @@ impl RetryClientTrait for RetryClient { // These get_* functions will try multiple times to make a request, reconnecting as necessary. // It does not know about encoding. Caller should take care of it. async fn get_region(self: Arc, key: Vec) -> Result { - retry!(self, "get_region", |cluster| { + retry_mut!(self, "get_region", |cluster| { let key = key.clone(); async { cluster @@ -141,7 +157,7 @@ impl RetryClientTrait for RetryClient { } async fn get_region_by_id(self: Arc, region_id: RegionId) -> Result { - retry!(self, "get_region_by_id", |cluster| async { + retry_mut!(self, "get_region_by_id", |cluster| async { cluster .get_region_by_id(region_id, self.timeout) .await @@ -152,7 +168,7 @@ impl RetryClientTrait for RetryClient { } async fn get_store(self: Arc, id: StoreId) -> Result { - retry!(self, "get_store", |cluster| async { + retry_mut!(self, "get_store", |cluster| async { cluster .get_store(id, self.timeout) .await @@ -161,7 +177,7 @@ impl RetryClientTrait for RetryClient { } async fn get_all_stores(self: Arc) -> Result> { - retry!(self, "get_all_stores", |cluster| async { + retry_mut!(self, "get_all_stores", |cluster| async { cluster .get_all_stores(self.timeout) .await @@ -174,7 +190,7 @@ impl RetryClientTrait for RetryClient { } async fn update_safepoint(self: Arc, safepoint: u64) -> Result { - retry!(self, "update_gc_safepoint", |cluster| async { + retry_mut!(self, "update_gc_safepoint", |cluster| async { cluster .update_safepoint(safepoint, self.timeout) .await @@ -257,11 +273,11 @@ mod test { } async fn retry_err(client: Arc) -> Result<()> { - retry!(client, "test", |_c| ready(Err(internal_err!("whoops")))) + retry_mut!(client, "test", |_c| ready(Err(internal_err!("whoops")))) } async fn retry_ok(client: Arc) -> Result<()> { - retry!(client, "test", |_c| ready(Ok::<_, Error>(()))) + retry_mut!(client, "test", |_c| ready(Ok::<_, Error>(()))) } executor::block_on(async { @@ -310,7 +326,7 @@ mod test { client: Arc, max_retries: Arc, ) -> Result<()> { - retry!(client, "test", |c| { + retry_mut!(client, "test", |c| { c.fetch_add(1, std::sync::atomic::Ordering::SeqCst); let max_retries = max_retries.fetch_sub(1, Ordering::SeqCst) - 1; @@ -326,7 +342,7 @@ mod test { client: Arc, max_retries: Arc, ) -> Result<()> { - retry!(client, "test", |c| { + retry_mut!(client, "test", |c| { c.fetch_add(1, std::sync::atomic::Ordering::SeqCst); let max_retries = max_retries.fetch_sub(1, Ordering::SeqCst) - 1; diff --git a/src/pd/timestamp.rs b/src/pd/timestamp.rs index 672b587b..ac99e052 100644 --- a/src/pd/timestamp.rs +++ b/src/pd/timestamp.rs @@ -148,8 +148,8 @@ impl Stream for TsoRequestStream { Poll::Ready(Some(sender)) => { requests.push(sender); } - Poll::Ready(None) => return Poll::Ready(None), - Poll::Pending => break, + Poll::Ready(None) if requests.is_empty() => return Poll::Ready(None), + _ => break, } }