Skip to content

Commit

Permalink
chore(proxy): discard request context span during passthrough (#9882)
Browse files Browse the repository at this point in the history
## Problem

The RequestContext::span shouldn't live for the entire postgres
connection, only the handshake.

## Summary of changes

* Slight refactor to the RequestContext to discard the span upon
handshake completion.
* Make sure the temporary future for the handshake is dropped (not bound
to a variable)
* Runs our nightly fmt script
  • Loading branch information
conradludgate authored Nov 25, 2024
1 parent a74ab93 commit 96a1b71
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 57 deletions.
6 changes: 2 additions & 4 deletions proxy/src/cancellation.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use std::net::SocketAddr;
use std::net::{IpAddr, SocketAddr};
use std::sync::Arc;

use dashmap::DashMap;
use ipnet::{IpNet, Ipv4Net, Ipv6Net};
use pq_proto::CancelKeyData;
use thiserror::Error;
use tokio::net::TcpStream;
Expand All @@ -17,9 +18,6 @@ use crate::rate_limiter::LeakyBucketRateLimiter;
use crate::redis::cancellation_publisher::{
CancellationPublisher, CancellationPublisherMut, RedisPublisherClient,
};
use std::net::IpAddr;

use ipnet::{IpNet, Ipv4Net, Ipv6Net};

pub type CancelMap = Arc<DashMap<CancelKeyData, Option<CancelClosure>>>;
pub type CancellationHandlerMain = CancellationHandler<Option<Arc<Mutex<RedisPublisherClient>>>>;
Expand Down
40 changes: 19 additions & 21 deletions proxy/src/console_redirect_proxy.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::sync::Arc;

use futures::TryFutureExt;
use futures::{FutureExt, TryFutureExt};
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
use tokio_util::sync::CancellationToken;
use tracing::{debug, error, info, Instrument};
Expand Down Expand Up @@ -88,40 +88,37 @@ pub async fn task_main(
crate::metrics::Protocol::Tcp,
&config.region,
);
let span = ctx.span();

let startup = Box::pin(
handle_client(
config,
backend,
&ctx,
cancellation_handler,
socket,
conn_gauge,
)
.instrument(span.clone()),
);
let res = startup.await;

let res = handle_client(
config,
backend,
&ctx,
cancellation_handler,
socket,
conn_gauge,
)
.instrument(ctx.span())
.boxed()
.await;

match res {
Err(e) => {
// todo: log and push to ctx the error kind
ctx.set_error_kind(e.get_error_kind());
error!(parent: &span, "per-client task finished with an error: {e:#}");
error!(parent: &ctx.span(), "per-client task finished with an error: {e:#}");
}
Ok(None) => {
ctx.set_success();
}
Ok(Some(p)) => {
ctx.set_success();
ctx.log_connect();
match p.proxy_pass().instrument(span.clone()).await {
let _disconnect = ctx.log_connect();
match p.proxy_pass().await {
Ok(()) => {}
Err(ErrorSource::Client(e)) => {
error!(parent: &span, "per-client task finished with an IO error from the client: {e:#}");
error!(?session_id, "per-client task finished with an IO error from the client: {e:#}");
}
Err(ErrorSource::Compute(e)) => {
error!(parent: &span, "per-client task finished with an IO error from the compute: {e:#}");
error!(?session_id, "per-client task finished with an IO error from the compute: {e:#}");
}
}
}
Expand Down Expand Up @@ -219,6 +216,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
client: stream,
aux: node.aux.clone(),
compute: node,
session_id: ctx.session_id(),
_req: request_gauge,
_conn: conn_gauge,
_cancel: session,
Expand Down
23 changes: 16 additions & 7 deletions proxy/src/context/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,11 +272,14 @@ impl RequestContext {
this.success = true;
}

pub fn log_connect(&self) {
self.0
.try_lock()
.expect("should not deadlock")
.log_connect();
pub fn log_connect(self) -> DisconnectLogger {
let mut this = self.0.into_inner();
this.log_connect();

// close current span.
this.span = Span::none();

DisconnectLogger(this)
}

pub(crate) fn protocol(&self) -> Protocol {
Expand Down Expand Up @@ -434,8 +437,14 @@ impl Drop for RequestContextInner {
fn drop(&mut self) {
if self.sender.is_some() {
self.log_connect();
} else {
self.log_disconnect();
}
}
}

pub struct DisconnectLogger(RequestContextInner);

impl Drop for DisconnectLogger {
fn drop(&mut self) {
self.0.log_disconnect();
}
}
44 changes: 21 additions & 23 deletions proxy/src/proxy/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pub(crate) mod wake_compute;
use std::sync::Arc;

pub use copy_bidirectional::{copy_bidirectional_client_compute, ErrorSource};
use futures::TryFutureExt;
use futures::{FutureExt, TryFutureExt};
use itertools::Itertools;
use once_cell::sync::OnceCell;
use pq_proto::{BeMessage as Be, StartupMessageParams};
Expand Down Expand Up @@ -123,42 +123,39 @@ pub async fn task_main(
crate::metrics::Protocol::Tcp,
&config.region,
);
let span = ctx.span();

let startup = Box::pin(
handle_client(
config,
auth_backend,
&ctx,
cancellation_handler,
socket,
ClientMode::Tcp,
endpoint_rate_limiter2,
conn_gauge,
)
.instrument(span.clone()),
);
let res = startup.await;

let res = handle_client(
config,
auth_backend,
&ctx,
cancellation_handler,
socket,
ClientMode::Tcp,
endpoint_rate_limiter2,
conn_gauge,
)
.instrument(ctx.span())
.boxed()
.await;

match res {
Err(e) => {
// todo: log and push to ctx the error kind
ctx.set_error_kind(e.get_error_kind());
warn!(parent: &span, "per-client task finished with an error: {e:#}");
warn!(parent: &ctx.span(), "per-client task finished with an error: {e:#}");
}
Ok(None) => {
ctx.set_success();
}
Ok(Some(p)) => {
ctx.set_success();
ctx.log_connect();
match p.proxy_pass().instrument(span.clone()).await {
let _disconnect = ctx.log_connect();
match p.proxy_pass().await {
Ok(()) => {}
Err(ErrorSource::Client(e)) => {
warn!(parent: &span, "per-client task finished with an IO error from the client: {e:#}");
warn!(?session_id, "per-client task finished with an IO error from the client: {e:#}");
}
Err(ErrorSource::Compute(e)) => {
error!(parent: &span, "per-client task finished with an IO error from the compute: {e:#}");
error!(?session_id, "per-client task finished with an IO error from the compute: {e:#}");
}
}
}
Expand Down Expand Up @@ -352,6 +349,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
client: stream,
aux: node.aux.clone(),
compute: node,
session_id: ctx.session_id(),
_req: request_gauge,
_conn: conn_gauge,
_cancel: session,
Expand Down
3 changes: 2 additions & 1 deletion proxy/src/proxy/passthrough.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ pub(crate) struct ProxyPassthrough<P, S> {
pub(crate) client: Stream<S>,
pub(crate) compute: PostgresConnection,
pub(crate) aux: MetricsAuxInfo,
pub(crate) session_id: uuid::Uuid,

pub(crate) _req: NumConnectionRequestsGuard<'static>,
pub(crate) _conn: NumClientConnectionsGuard<'static>,
Expand All @@ -69,7 +70,7 @@ impl<P, S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<P, S> {
pub(crate) async fn proxy_pass(self) -> Result<(), ErrorSource> {
let res = proxy_pass(self.client, self.compute.stream, self.aux).await;
if let Err(err) = self.compute.cancel_closure.try_cancel_query().await {
tracing::warn!(?err, "could not cancel the query in the database");
tracing::warn!(session_id = ?self.session_id, ?err, "could not cancel the query in the database");
}
res
}
Expand Down
2 changes: 1 addition & 1 deletion proxy/src/redis/cancellation_publisher.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use core::net::IpAddr;
use std::sync::Arc;

use core::net::IpAddr;
use pq_proto::CancelKeyData;
use redis::AsyncCommands;
use tokio::sync::Mutex;
Expand Down

1 comment on commit 96a1b71

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

6941 tests run: 6631 passed, 2 failed, 308 skipped (full report)


Failures on Postgres 17

  • test_crafted_wal_end[vanilla-last_wal_record_crossing_segment]: debug-x86-64

Failures on Postgres 14

# Run all failed tests locally:
scripts/pytest -vv -n $(nproc) -k "test_pageserver_compaction_smoke[release-pg14-interpreted] or test_crafted_wal_end[debug-pg17-vanilla-last_wal_record_crossing_segment]"
Flaky tests (1)

Postgres 17

Test coverage report is not available

The comment gets automatically updated with the latest test results
96a1b71 at 2024-11-25T22:35:58.139Z :recycle:

Please sign in to comment.