From 3b42fc65ee1228fbc32d80d2d928419fe6172e8b Mon Sep 17 00:00:00 2001
From: JmPotato <ghzpotato@gmail.com>
Date: Wed, 29 May 2024 11:27:38 +0800
Subject: [PATCH 1/4] Handle 503 HTTP code to schedule member change check

Signed-off-by: JmPotato <ghzpotato@gmail.com>
---
 client/http/client.go       | 48 ++++++++++++++++++++++++-------------
 client/http/request_info.go | 11 +++++++++
 2 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/client/http/client.go b/client/http/client.go
index 30144ebe2c5..35d184e95db 100644
--- a/client/http/client.go
+++ b/client/http/client.go
@@ -120,10 +120,23 @@ func (ci *clientInner) requestWithRetry(
 	headerOpts ...HeaderOption,
 ) error {
 	var (
+		serverURL  string
+		isLeader   bool
 		statusCode int
 		err        error
+		logFields  = append(reqInfo.logFields(),
+			zap.String("source", ci.source),
+			zap.String("server-url", serverURL),
+			zap.Bool("is-leader", isLeader),
+			zap.Int("status-code", statusCode),
+			zap.Error(err))
 	)
 	execFunc := func() error {
+		defer func() {
+			// Handle some special status codes to increase the success rate of the following requests.
+			ci.handleHTTPStatusCode(statusCode)
+			log.Debug("[pd] http request finished", logFields...)
+		}()
 		// It will try to send the request to the PD leader first and then try to send the request to the other PD followers.
 		clients := ci.sd.GetAllServiceClients()
 		if len(clients) == 0 {
@@ -131,17 +144,17 @@ func (ci *clientInner) requestWithRetry(
 		}
 		skipNum := 0
 		for _, cli := range clients {
-			url := cli.GetURL()
-			if reqInfo.targetURL != "" && reqInfo.targetURL != url {
+			serverURL = cli.GetURL()
+			isLeader = cli.IsConnectedToLeader()
+			if len(reqInfo.targetURL) > 0 && reqInfo.targetURL != serverURL {
 				skipNum++
 				continue
 			}
-			statusCode, err = ci.doRequest(ctx, url, reqInfo, headerOpts...)
+			statusCode, err = ci.doRequest(ctx, serverURL, reqInfo, headerOpts...)
 			if err == nil || noNeedRetry(statusCode) {
 				return err
 			}
-			log.Debug("[pd] request url failed",
-				zap.String("source", ci.source), zap.Bool("is-leader", cli.IsConnectedToLeader()), zap.String("url", url), zap.Error(err))
+			log.Debug("[pd] http request url failed", logFields...)
 		}
 		if skipNum == len(clients) {
 			return errs.ErrClientNoTargetMember
@@ -153,13 +166,21 @@ func (ci *clientInner) requestWithRetry(
 	}
 	// Copy a new backoffer for each request.
 	bo := *reqInfo.bo
-	// Backoffer also needs to check the status code to determine whether to retry.
+	// Set the retryable checker for the backoffer.
 	bo.SetRetryableChecker(func(err error) bool {
+		// Backoffer also needs to check the status code to determine whether to retry.
 		return err != nil && !noNeedRetry(statusCode)
 	})
 	return bo.Exec(ctx, execFunc)
 }
 
+func (ci *clientInner) handleHTTPStatusCode(code int) {
+	// If the status code is 503, it indicates that there may be PD leader/follower changes.
+	if code == http.StatusServiceUnavailable {
+		ci.sd.ScheduleCheckMemberChanged()
+	}
+}
+
 func noNeedRetry(statusCode int) bool {
 	return statusCode == http.StatusNotFound ||
 		statusCode == http.StatusForbidden ||
@@ -168,26 +189,21 @@ func noNeedRetry(statusCode int) bool {
 
 func (ci *clientInner) doRequest(
 	ctx context.Context,
-	url string, reqInfo *requestInfo,
+	serverURL string, reqInfo *requestInfo,
 	headerOpts ...HeaderOption,
 ) (int, error) {
 	var (
-		source      = ci.source
 		callerID    = reqInfo.callerID
 		name        = reqInfo.name
 		method      = reqInfo.method
 		body        = reqInfo.body
 		res         = reqInfo.res
 		respHandler = reqInfo.respHandler
+		url         = reqInfo.getURL(serverURL)
+		logFields   = append(reqInfo.logFields(),
+			zap.String("source", ci.source),
+			zap.String("url", url))
 	)
-	url = reqInfo.getURL(url)
-	logFields := []zap.Field{
-		zap.String("source", source),
-		zap.String("name", name),
-		zap.String("url", url),
-		zap.String("method", method),
-		zap.String("caller-id", callerID),
-	}
 	log.Debug("[pd] request the http url", logFields...)
 	req, err := http.NewRequestWithContext(ctx, method, url, bytes.NewBuffer(body))
 	if err != nil {
diff --git a/client/http/request_info.go b/client/http/request_info.go
index 202eab1150f..b95c8ad7fa4 100644
--- a/client/http/request_info.go
+++ b/client/http/request_info.go
@@ -18,6 +18,7 @@ import (
 	"fmt"
 
 	"github.com/tikv/pd/client/retry"
+	"go.uber.org/zap"
 )
 
 // The following constants are the names of the requests.
@@ -157,3 +158,13 @@ func (ri *requestInfo) WithTargetURL(targetURL string) *requestInfo {
 func (ri *requestInfo) getURL(addr string) string {
 	return fmt.Sprintf("%s%s", addr, ri.uri)
 }
+
+func (ri *requestInfo) logFields() []zap.Field {
+	return []zap.Field{
+		zap.String("callerID", ri.callerID),
+		zap.String("name", ri.name),
+		zap.String("uri", ri.uri),
+		zap.String("method", ri.method),
+		zap.String("targetURL", ri.targetURL),
+	}
+}

From d952e06f1ecefd5e6d5f6aa6c650aeca3f7d26e0 Mon Sep 17 00:00:00 2001
From: JmPotato <ghzpotato@gmail.com>
Date: Thu, 30 May 2024 11:21:27 +0800
Subject: [PATCH 2/4] Refine the retryable checker

Signed-off-by: JmPotato <ghzpotato@gmail.com>
---
 client/http/client.go        |  4 ++--
 client/retry/backoff.go      | 24 ++++++++++++++----------
 client/retry/backoff_test.go | 26 ++++++++++++++++++++++----
 3 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/client/http/client.go b/client/http/client.go
index 35d184e95db..0bf68d88df9 100644
--- a/client/http/client.go
+++ b/client/http/client.go
@@ -166,11 +166,11 @@ func (ci *clientInner) requestWithRetry(
 	}
 	// Copy a new backoffer for each request.
 	bo := *reqInfo.bo
-	// Set the retryable checker for the backoffer.
+	// Set the retryable checker for the backoffer if it's not set.
 	bo.SetRetryableChecker(func(err error) bool {
 		// Backoffer also needs to check the status code to determine whether to retry.
 		return err != nil && !noNeedRetry(statusCode)
-	})
+	}, false)
 	return bo.Exec(ctx, execFunc)
 }
 
diff --git a/client/retry/backoff.go b/client/retry/backoff.go
index 580e466badb..6c72b68ab9d 100644
--- a/client/retry/backoff.go
+++ b/client/retry/backoff.go
@@ -50,7 +50,7 @@ type Backoffer struct {
 	// total defines the max total time duration cost in retrying. If it's 0, it means infinite retry until success.
 	total time.Duration
 	// retryableChecker is used to check if the error is retryable.
-	// By default, all errors are retryable.
+	// If it's not set, it will use `defaultRetryableChecker` to retry on all non-nil errors.
 	retryableChecker func(err error) bool
 	// logInterval defines the log interval for retrying.
 	logInterval time.Duration
@@ -132,12 +132,9 @@ func InitialBackoffer(base, max, total time.Duration, opts ...Option) *Backoffer
 		total = base
 	}
 	bo := &Backoffer{
-		base:  base,
-		max:   max,
-		total: total,
-		retryableChecker: func(err error) bool {
-			return err != nil
-		},
+		base:         base,
+		max:          max,
+		total:        total,
 		next:         base,
 		currentTotal: 0,
 		attempt:      0,
@@ -148,18 +145,25 @@ func InitialBackoffer(base, max, total time.Duration, opts ...Option) *Backoffer
 	return bo
 }
 
-// SetRetryableChecker sets the retryable checker.
-func (bo *Backoffer) SetRetryableChecker(checker func(err error) bool) {
+// SetRetryableChecker sets the retryable checker, `overwrite` flag is used to indicate whether to overwrite the existing checker.
+func (bo *Backoffer) SetRetryableChecker(checker func(err error) bool, overwrite bool) {
+	if !overwrite && bo.retryableChecker != nil {
+		return
+	}
 	bo.retryableChecker = checker
 }
 
 func (bo *Backoffer) isRetryable(err error) bool {
 	if bo.retryableChecker == nil {
-		return true
+		return defaultRetryableChecker(err)
 	}
 	return bo.retryableChecker(err)
 }
 
+func defaultRetryableChecker(err error) bool {
+	return err != nil
+}
+
 // nextInterval for now use the `exponentialInterval`.
 func (bo *Backoffer) nextInterval() time.Duration {
 	return bo.exponentialInterval()
diff --git a/client/retry/backoff_test.go b/client/retry/backoff_test.go
index 8df06b75f94..8dd44033b55 100644
--- a/client/retry/backoff_test.go
+++ b/client/retry/backoff_test.go
@@ -95,16 +95,34 @@ func TestBackoffer(t *testing.T) {
 	// Test the retryable checker.
 	execCount = 0
 	bo = InitialBackoffer(base, max, total)
-	bo.SetRetryableChecker(func(error) bool {
+	retryableChecker := func(error) bool {
 		return execCount < 2
-	})
-	err = bo.Exec(ctx, func() error {
+	}
+	bo.SetRetryableChecker(retryableChecker, false)
+	execFunc := func() error {
 		execCount++
 		return nil
-	})
+	}
+	err = bo.Exec(ctx, execFunc)
+	re.NoError(err)
+	re.Equal(2, execCount)
+	re.True(isBackofferReset(bo))
+	// Test the retryable checker with overwrite.
+	execCount = 0
+	retryableChecker = func(error) bool {
+		return execCount < 4
+	}
+	bo.SetRetryableChecker(retryableChecker, false)
+	err = bo.Exec(ctx, execFunc)
 	re.NoError(err)
 	re.Equal(2, execCount)
 	re.True(isBackofferReset(bo))
+	execCount = 0
+	bo.SetRetryableChecker(retryableChecker, true)
+	err = bo.Exec(ctx, execFunc)
+	re.NoError(err)
+	re.Equal(4, execCount)
+	re.True(isBackofferReset(bo))
 }
 
 func isBackofferReset(bo *Backoffer) bool {

From 75da7c0bf9febe67f486381afa3aa5add68aaa1f Mon Sep 17 00:00:00 2001
From: JmPotato <ghzpotato@gmail.com>
Date: Thu, 30 May 2024 11:57:01 +0800
Subject: [PATCH 3/4] Handle HTTP errors for leader/primary change

Signed-off-by: JmPotato <ghzpotato@gmail.com>
---
 client/client.go                                 | 11 -----------
 client/errs/errno.go                             |  4 ++++
 client/errs/errs.go                              | 14 ++++++++++++++
 client/http/client.go                            | 11 ++++++-----
 client/tso_dispatcher.go                         |  2 +-
 errors.toml                                      |  5 +++++
 pkg/errs/errno.go                                |  8 ++++----
 pkg/utils/apiutil/multiservicesapi/middleware.go |  4 ++--
 pkg/utils/apiutil/serverapi/middleware.go        |  2 +-
 server/apiv2/middlewares/redirector.go           |  4 ++--
 tests/integrations/client/client_test.go         |  3 ++-
 11 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/client/client.go b/client/client.go
index 1865fd0866e..1c8ef3cafe8 100644
--- a/client/client.go
+++ b/client/client.go
@@ -1431,17 +1431,6 @@ func (c *client) scatterRegionsWithOptions(ctx context.Context, regionsID []uint
 	return resp, nil
 }
 
-// IsLeaderChange will determine whether there is a leader change.
-func IsLeaderChange(err error) bool {
-	if err == errs.ErrClientTSOStreamClosed {
-		return true
-	}
-	errMsg := err.Error()
-	return strings.Contains(errMsg, errs.NotLeaderErr) ||
-		strings.Contains(errMsg, errs.MismatchLeaderErr) ||
-		strings.Contains(errMsg, errs.NotServedErr)
-}
-
 const (
 	httpSchemePrefix  = "http://"
 	httpsSchemePrefix = "https://"
diff --git a/client/errs/errno.go b/client/errs/errno.go
index 50c136dd5f2..f181cddb286 100644
--- a/client/errs/errno.go
+++ b/client/errs/errno.go
@@ -35,6 +35,10 @@ const (
 	NotServedErr = "is not served"
 	// RetryTimeoutErr indicates the server is busy.
 	RetryTimeoutErr = "retry timeout"
+	// NotPrimaryErr indicates the non-primary member received the requests which should be received by primary.
+	// Note: keep the same as the ones defined on the server side, because the client side checks if an error message
+	// contains this string to judge whether the primary is changed.
+	NotPrimaryErr = "is not primary"
 )
 
 // client errors
diff --git a/client/errs/errs.go b/client/errs/errs.go
index 47f7c29a467..ee29e4c4349 100644
--- a/client/errs/errs.go
+++ b/client/errs/errs.go
@@ -15,11 +15,25 @@
 package errs
 
 import (
+	"strings"
+
 	"github.com/pingcap/errors"
 	"go.uber.org/zap"
 	"go.uber.org/zap/zapcore"
 )
 
+// IsLeaderChange will determine whether there is a leader change.
+func IsLeaderChange(err error) bool {
+	if err == ErrClientTSOStreamClosed {
+		return true
+	}
+	errMsg := err.Error()
+	return strings.Contains(errMsg, NotLeaderErr) ||
+		strings.Contains(errMsg, MismatchLeaderErr) ||
+		strings.Contains(errMsg, NotServedErr) ||
+		strings.Contains(errMsg, NotPrimaryErr)
+}
+
 // ZapError is used to make the log output easier.
 func ZapError(err error, causeError ...error) zap.Field {
 	if err == nil {
diff --git a/client/http/client.go b/client/http/client.go
index 0bf68d88df9..c90efcaa869 100644
--- a/client/http/client.go
+++ b/client/http/client.go
@@ -133,8 +133,8 @@ func (ci *clientInner) requestWithRetry(
 	)
 	execFunc := func() error {
 		defer func() {
-			// Handle some special status codes to increase the success rate of the following requests.
-			ci.handleHTTPStatusCode(statusCode)
+			// Handle some special status codes and errors to increase the success rate of the following requests.
+			ci.handleHTTPStatusCodeAndErr(statusCode, err)
 			log.Debug("[pd] http request finished", logFields...)
 		}()
 		// It will try to send the request to the PD leader first and then try to send the request to the other PD followers.
@@ -174,9 +174,10 @@ func (ci *clientInner) requestWithRetry(
 	return bo.Exec(ctx, execFunc)
 }
 
-func (ci *clientInner) handleHTTPStatusCode(code int) {
-	// If the status code is 503, it indicates that there may be PD leader/follower changes.
-	if code == http.StatusServiceUnavailable {
+func (ci *clientInner) handleHTTPStatusCodeAndErr(code int, err error) {
+	// - If the status code is 503, it indicates that there may be PD leader/follower changes.
+	// - If the error message contains the leader/primary change information, it indicates that there may be PD leader/primary change.
+	if code == http.StatusServiceUnavailable || errs.IsLeaderChange(err) {
 		ci.sd.ScheduleCheckMemberChanged()
 	}
 }
diff --git a/client/tso_dispatcher.go b/client/tso_dispatcher.go
index d5b52ad6039..0919fd84744 100644
--- a/client/tso_dispatcher.go
+++ b/client/tso_dispatcher.go
@@ -303,7 +303,7 @@ tsoBatchLoop:
 			cancel()
 			stream = nil
 			// Because ScheduleCheckMemberChanged is asynchronous, if the leader changes, we better call `updateMember` ASAP.
-			if IsLeaderChange(err) {
+			if errs.IsLeaderChange(err) {
 				if err := bo.Exec(ctx, svcDiscovery.CheckMemberChanged); err != nil {
 					select {
 					case <-ctx.Done():
diff --git a/errors.toml b/errors.toml
index 64101000478..a275cfa7501 100644
--- a/errors.toml
+++ b/errors.toml
@@ -21,6 +21,11 @@ error = '''
 redirect to not leader
 '''
 
+["PD:apiutil:ErrRedirectToNotPrimary"]
+error = '''
+redirect to not primary
+'''
+
 ["PD:autoscaling:ErrEmptyMetricsResponse"]
 error = '''
 metrics response from Prometheus is empty
diff --git a/pkg/errs/errno.go b/pkg/errs/errno.go
index 8c3e914531b..8f67c59cfcc 100644
--- a/pkg/errs/errno.go
+++ b/pkg/errs/errno.go
@@ -195,10 +195,10 @@ var (
 
 // apiutil errors
 var (
-	ErrRedirect       = errors.Normalize("redirect failed", errors.RFCCodeText("PD:apiutil:ErrRedirect"))
-	ErrOptionNotExist = errors.Normalize("the option %s does not exist", errors.RFCCodeText("PD:apiutil:ErrOptionNotExist"))
-	// ErrRedirectToNotLeader is the error message for redirect to not leader.
-	ErrRedirectToNotLeader = errors.Normalize("redirect to not leader", errors.RFCCodeText("PD:apiutil:ErrRedirectToNotLeader"))
+	ErrRedirect             = errors.Normalize("redirect failed", errors.RFCCodeText("PD:apiutil:ErrRedirect"))
+	ErrOptionNotExist       = errors.Normalize("the option %s does not exist", errors.RFCCodeText("PD:apiutil:ErrOptionNotExist"))
+	ErrRedirectToNotLeader  = errors.Normalize("redirect to not leader", errors.RFCCodeText("PD:apiutil:ErrRedirectToNotLeader"))
+	ErrRedirectToNotPrimary = errors.Normalize("redirect to not primary", errors.RFCCodeText("PD:apiutil:ErrRedirectToNotPrimary"))
 )
 
 // grpcutil errors
diff --git a/pkg/utils/apiutil/multiservicesapi/middleware.go b/pkg/utils/apiutil/multiservicesapi/middleware.go
index ed34ecc6afb..4343adcc981 100644
--- a/pkg/utils/apiutil/multiservicesapi/middleware.go
+++ b/pkg/utils/apiutil/multiservicesapi/middleware.go
@@ -48,8 +48,8 @@ func ServiceRedirector() gin.HandlerFunc {
 
 		// Prevent more than one redirection.
 		if name := c.Request.Header.Get(ServiceRedirectorHeader); len(name) != 0 {
-			log.Error("redirect but server is not primary", zap.String("from", name), zap.String("server", svr.Name()), errs.ZapError(errs.ErrRedirect))
-			c.AbortWithStatusJSON(http.StatusInternalServerError, errs.ErrRedirect.FastGenByArgs().Error())
+			log.Error("redirect but server is not primary", zap.String("from", name), zap.String("server", svr.Name()), errs.ZapError(errs.ErrRedirectToNotPrimary))
+			c.AbortWithStatusJSON(http.StatusInternalServerError, errs.ErrRedirectToNotPrimary.FastGenByArgs().Error())
 			return
 		}
 
diff --git a/pkg/utils/apiutil/serverapi/middleware.go b/pkg/utils/apiutil/serverapi/middleware.go
index 18dd2f52155..c360c964856 100755
--- a/pkg/utils/apiutil/serverapi/middleware.go
+++ b/pkg/utils/apiutil/serverapi/middleware.go
@@ -216,7 +216,7 @@ func (h *redirector) ServeHTTP(w http.ResponseWriter, r *http.Request, next http
 		r.Header.Set(apiutil.PDRedirectorHeader, h.s.Name())
 	} else {
 		// Prevent more than one redirection among PD/API servers.
-		log.Error("redirect but server is not leader", zap.String("from", name), zap.String("server", h.s.Name()), errs.ZapError(errs.ErrRedirect))
+		log.Error("redirect but server is not leader", zap.String("from", name), zap.String("server", h.s.Name()), errs.ZapError(errs.ErrRedirectToNotLeader))
 		http.Error(w, errs.ErrRedirectToNotLeader.FastGenByArgs().Error(), http.StatusInternalServerError)
 		return
 	}
diff --git a/server/apiv2/middlewares/redirector.go b/server/apiv2/middlewares/redirector.go
index 37c06de1585..9c2c4081175 100644
--- a/server/apiv2/middlewares/redirector.go
+++ b/server/apiv2/middlewares/redirector.go
@@ -43,8 +43,8 @@ func Redirector() gin.HandlerFunc {
 
 		// Prevent more than one redirection.
 		if name := c.Request.Header.Get(apiutil.PDRedirectorHeader); len(name) != 0 {
-			log.Error("redirect but server is not leader", zap.String("from", name), zap.String("server", svr.Name()), errs.ZapError(errs.ErrRedirect))
-			c.AbortWithStatusJSON(http.StatusInternalServerError, errs.ErrRedirect.FastGenByArgs().Error())
+			log.Error("redirect but server is not leader", zap.String("from", name), zap.String("server", svr.Name()), errs.ZapError(errs.ErrRedirectToNotLeader))
+			c.AbortWithStatusJSON(http.StatusInternalServerError, errs.ErrRedirectToNotLeader.FastGenByArgs().Error())
 			return
 		}
 
diff --git a/tests/integrations/client/client_test.go b/tests/integrations/client/client_test.go
index dfe7a6980c7..65acd897726 100644
--- a/tests/integrations/client/client_test.go
+++ b/tests/integrations/client/client_test.go
@@ -40,6 +40,7 @@ import (
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
 	pd "github.com/tikv/pd/client"
+	clierrs "github.com/tikv/pd/client/errs"
 	"github.com/tikv/pd/client/retry"
 	"github.com/tikv/pd/pkg/core"
 	"github.com/tikv/pd/pkg/errs"
@@ -528,7 +529,7 @@ func TestGlobalAndLocalTSO(t *testing.T) {
 	re.NotEmpty(cluster.WaitLeader())
 	_, _, err = cli.GetTS(ctx)
 	re.Error(err)
-	re.True(pd.IsLeaderChange(err))
+	re.True(clierrs.IsLeaderChange(err))
 	_, _, err = cli.GetTS(ctx)
 	re.NoError(err)
 	re.NoError(failpoint.Disable("github.com/tikv/pd/client/skipUpdateMember"))

From 541981cc21b2c6093847525e146b50c620aa8f1d Mon Sep 17 00:00:00 2001
From: JmPotato <ghzpotato@gmail.com>
Date: Thu, 30 May 2024 14:23:50 +0800
Subject: [PATCH 4/4] Fix the multierr bug

Signed-off-by: JmPotato <ghzpotato@gmail.com>
---
 client/errs/errs.go                           |  5 ++-
 client/go.mod                                 |  2 +-
 client/go.sum                                 |  3 +-
 client/http/client.go                         | 39 +++++++++---------
 client/http/request_info.go                   |  4 +-
 client/retry/backoff.go                       | 16 ++------
 client/retry/backoff_test.go                  |  2 +-
 tests/integrations/client/http_client_test.go | 41 +++++++++++++++++++
 8 files changed, 74 insertions(+), 38 deletions(-)

diff --git a/client/errs/errs.go b/client/errs/errs.go
index ee29e4c4349..2c25e009849 100644
--- a/client/errs/errs.go
+++ b/client/errs/errs.go
@@ -22,8 +22,11 @@ import (
 	"go.uber.org/zap/zapcore"
 )
 
-// IsLeaderChange will determine whether there is a leader change.
+// IsLeaderChange will determine whether there is a leader/primary change.
 func IsLeaderChange(err error) bool {
+	if err == nil {
+		return false
+	}
 	if err == ErrClientTSOStreamClosed {
 		return true
 	}
diff --git a/client/go.mod b/client/go.mod
index 89799796521..543f013fd11 100644
--- a/client/go.mod
+++ b/client/go.mod
@@ -16,7 +16,6 @@ require (
 	github.com/stretchr/testify v1.8.2
 	go.uber.org/atomic v1.10.0
 	go.uber.org/goleak v1.1.11
-	go.uber.org/multierr v1.11.0
 	go.uber.org/zap v1.24.0
 	golang.org/x/exp v0.0.0-20230711005742-c3f37128e5a4
 	google.golang.org/grpc v1.62.1
@@ -34,6 +33,7 @@ require (
 	github.com/prometheus/client_model v0.5.0 // indirect
 	github.com/prometheus/common v0.46.0 // indirect
 	github.com/prometheus/procfs v0.12.0 // indirect
+	go.uber.org/multierr v1.7.0 // indirect
 	golang.org/x/net v0.23.0 // indirect
 	golang.org/x/sys v0.18.0 // indirect
 	golang.org/x/text v0.14.0 // indirect
diff --git a/client/go.sum b/client/go.sum
index 54942bb0bb8..a26571171ad 100644
--- a/client/go.sum
+++ b/client/go.sum
@@ -88,9 +88,8 @@ go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A
 go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI=
 go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ=
 go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
+go.uber.org/multierr v1.7.0 h1:zaiO/rmgFjbmCXdSYJWQcdvOCsthmdaHfr3Gm2Kx4Ec=
 go.uber.org/multierr v1.7.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak=
-go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
-go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
 go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI=
 go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60=
 go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg=
diff --git a/client/http/client.go b/client/http/client.go
index c90efcaa869..3abd8828e28 100644
--- a/client/http/client.go
+++ b/client/http/client.go
@@ -124,18 +124,20 @@ func (ci *clientInner) requestWithRetry(
 		isLeader   bool
 		statusCode int
 		err        error
-		logFields  = append(reqInfo.logFields(),
-			zap.String("source", ci.source),
-			zap.String("server-url", serverURL),
-			zap.Bool("is-leader", isLeader),
-			zap.Int("status-code", statusCode),
-			zap.Error(err))
+		logFields  = append(reqInfo.logFields(), zap.String("source", ci.source))
 	)
 	execFunc := func() error {
 		defer func() {
-			// Handle some special status codes and errors to increase the success rate of the following requests.
-			ci.handleHTTPStatusCodeAndErr(statusCode, err)
-			log.Debug("[pd] http request finished", logFields...)
+			// - If the status code is 503, it indicates that there may be PD leader/follower changes.
+			// - If the error message contains the leader/primary change information, it indicates that there may be PD leader/primary change.
+			if statusCode == http.StatusServiceUnavailable || errs.IsLeaderChange(err) {
+				ci.sd.ScheduleCheckMemberChanged()
+			}
+			log.Info("[pd] http request finished", append(logFields,
+				zap.String("server-url", serverURL),
+				zap.Bool("is-leader", isLeader),
+				zap.Int("status-code", statusCode),
+				zap.Error(err))...)
 		}()
 		// It will try to send the request to the PD leader first and then try to send the request to the other PD followers.
 		clients := ci.sd.GetAllServiceClients()
@@ -154,7 +156,11 @@ func (ci *clientInner) requestWithRetry(
 			if err == nil || noNeedRetry(statusCode) {
 				return err
 			}
-			log.Debug("[pd] http request url failed", logFields...)
+			log.Info("[pd] http request url failed", append(logFields,
+				zap.String("server-url", serverURL),
+				zap.Bool("is-leader", isLeader),
+				zap.Int("status-code", statusCode),
+				zap.Error(err))...)
 		}
 		if skipNum == len(clients) {
 			return errs.ErrClientNoTargetMember
@@ -174,14 +180,6 @@ func (ci *clientInner) requestWithRetry(
 	return bo.Exec(ctx, execFunc)
 }
 
-func (ci *clientInner) handleHTTPStatusCodeAndErr(code int, err error) {
-	// - If the status code is 503, it indicates that there may be PD leader/follower changes.
-	// - If the error message contains the leader/primary change information, it indicates that there may be PD leader/primary change.
-	if code == http.StatusServiceUnavailable || errs.IsLeaderChange(err) {
-		ci.sd.ScheduleCheckMemberChanged()
-	}
-}
-
 func noNeedRetry(statusCode int) bool {
 	return statusCode == http.StatusNotFound ||
 		statusCode == http.StatusForbidden ||
@@ -245,11 +243,14 @@ func (ci *clientInner) doRequest(
 		if readErr != nil {
 			logFields = append(logFields, zap.NamedError("read-body-error", err))
 		} else {
+			bs = bytes.TrimSpace(bs)
 			logFields = append(logFields, zap.ByteString("body", bs))
 		}
 
 		log.Error("[pd] request failed with a non-200 status", logFields...)
-		return resp.StatusCode, errors.Errorf("request pd http api failed with status: '%s'", resp.Status)
+		return resp.StatusCode, errors.Errorf(
+			"request pd http api failed with status: '%s', body: '%s'", resp.Status, bs,
+		)
 	}
 
 	if res == nil {
diff --git a/client/http/request_info.go b/client/http/request_info.go
index b95c8ad7fa4..3fb91c6ca97 100644
--- a/client/http/request_info.go
+++ b/client/http/request_info.go
@@ -161,10 +161,10 @@ func (ri *requestInfo) getURL(addr string) string {
 
 func (ri *requestInfo) logFields() []zap.Field {
 	return []zap.Field{
-		zap.String("callerID", ri.callerID),
+		zap.String("caller-id", ri.callerID),
 		zap.String("name", ri.name),
 		zap.String("uri", ri.uri),
 		zap.String("method", ri.method),
-		zap.String("targetURL", ri.targetURL),
+		zap.String("target-url", ri.targetURL),
 	}
 }
diff --git a/client/retry/backoff.go b/client/retry/backoff.go
index 6c72b68ab9d..4f0a8eca925 100644
--- a/client/retry/backoff.go
+++ b/client/retry/backoff.go
@@ -24,12 +24,9 @@ import (
 	"github.com/pingcap/errors"
 	"github.com/pingcap/failpoint"
 	"github.com/pingcap/log"
-	"go.uber.org/multierr"
 	"go.uber.org/zap"
 )
 
-const maxRecordErrorCount = 20
-
 // Option is used to customize the backoffer.
 type Option func(*Backoffer)
 
@@ -69,18 +66,13 @@ func (bo *Backoffer) Exec(
 ) error {
 	defer bo.resetBackoff()
 	var (
-		allErrors error
-		err       error
-		after     *time.Timer
+		err   error
+		after *time.Timer
 	)
 	fnName := getFunctionName(fn)
 	for {
 		err = fn()
 		bo.attempt++
-		if bo.attempt < maxRecordErrorCount {
-			// multierr.Append will ignore nil error.
-			allErrors = multierr.Append(allErrors, err)
-		}
 		if !bo.isRetryable(err) {
 			break
 		}
@@ -100,7 +92,7 @@ func (bo *Backoffer) Exec(
 		select {
 		case <-ctx.Done():
 			after.Stop()
-			return multierr.Append(allErrors, errors.Trace(ctx.Err()))
+			return errors.Trace(ctx.Err())
 		case <-after.C:
 			failpoint.Inject("backOffExecute", func() {
 				testBackOffExecuteFlag = true
@@ -115,7 +107,7 @@ func (bo *Backoffer) Exec(
 			}
 		}
 	}
-	return allErrors
+	return err
 }
 
 // InitialBackoffer make the initial state for retrying.
diff --git a/client/retry/backoff_test.go b/client/retry/backoff_test.go
index 8dd44033b55..35f6fca43a7 100644
--- a/client/retry/backoff_test.go
+++ b/client/retry/backoff_test.go
@@ -87,7 +87,7 @@ func TestBackoffer(t *testing.T) {
 		return expectedErr
 	})
 	re.InDelta(total, time.Since(start), float64(250*time.Millisecond))
-	re.ErrorContains(err, "test; test; test; test")
+	re.ErrorContains(err, "test")
 	re.ErrorIs(err, expectedErr)
 	re.Equal(4, execCount)
 	re.True(isBackofferReset(bo))
diff --git a/tests/integrations/client/http_client_test.go b/tests/integrations/client/http_client_test.go
index fa109946e4b..b873e689354 100644
--- a/tests/integrations/client/http_client_test.go
+++ b/tests/integrations/client/http_client_test.go
@@ -21,6 +21,7 @@ import (
 	"net/url"
 	"sort"
 	"strings"
+	"sync"
 	"testing"
 	"time"
 
@@ -757,3 +758,43 @@ func (suite *httpClientTestSuite) TestGetHealthStatus() {
 	re.Equal("pd2", healths[1].Name)
 	re.True(healths[0].Health && healths[1].Health)
 }
+
+func (suite *httpClientTestSuite) TestRetryOnLeaderChange() {
+	re := suite.Require()
+	ctx, cancel := context.WithCancel(suite.ctx)
+	defer cancel()
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		bo := retry.InitialBackoffer(100*time.Millisecond, time.Second, 0)
+		client := suite.client.WithBackoffer(bo)
+		for {
+			healths, err := client.GetHealthStatus(ctx)
+			if err != nil && strings.Contains(err.Error(), "context canceled") {
+				return
+			}
+			re.NoError(err)
+			re.Len(healths, 2)
+			select {
+			case <-ctx.Done():
+				return
+			default:
+			}
+		}
+	}()
+
+	leader := suite.cluster.GetLeaderServer()
+	re.NotNil(leader)
+	for i := 0; i < 3; i++ {
+		leader.ResignLeader()
+		re.NotEmpty(suite.cluster.WaitLeader())
+		leader = suite.cluster.GetLeaderServer()
+		re.NotNil(leader)
+	}
+
+	// Cancel the context to stop the goroutine.
+	cancel()
+	wg.Wait()
+}