Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sync-server: Fix infinite loop caused by accept error #271

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions src/sync/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#[cfg(unix)]
use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
use std::time::Duration;

use protobuf::{CodedInputStream, Message};
use std::collections::HashMap;
Expand All @@ -40,6 +41,7 @@ use crate::{MethodHandler, TtrpcContext};
const DEFAULT_WAIT_THREAD_COUNT_DEFAULT: usize = 3;
const DEFAULT_WAIT_THREAD_COUNT_MIN: usize = 1;
const DEFAULT_WAIT_THREAD_COUNT_MAX: usize = 5;
const DEFAULT_ACCEPT_RETRY_INTERVAL: Duration = Duration::from_secs(10);

type MessageSender = Sender<(MessageHeader, Vec<u8>)>;
type MessageReceiver = Receiver<(MessageHeader, Vec<u8>)>;
Expand All @@ -57,6 +59,7 @@ pub struct Server {
thread_count_default: usize,
thread_count_min: usize,
thread_count_max: usize,
accept_retry_interval: Duration,
}

struct Connection {
Expand Down Expand Up @@ -244,6 +247,7 @@ impl Default for Server {
thread_count_default: DEFAULT_WAIT_THREAD_COUNT_DEFAULT,
thread_count_min: DEFAULT_WAIT_THREAD_COUNT_MIN,
thread_count_max: DEFAULT_WAIT_THREAD_COUNT_MAX,
accept_retry_interval: DEFAULT_ACCEPT_RETRY_INTERVAL,
}
}
}
Expand Down Expand Up @@ -305,6 +309,11 @@ impl Server {
self
}

pub fn set_accept_retry_interval(mut self, interval: Duration) -> Server {
self.accept_retry_interval = interval;
self
}

pub fn start_listen(&mut self) -> Result<()> {
let connections = self.connections.clone();

Expand All @@ -320,6 +329,7 @@ impl Server {
let min = self.thread_count_min;
let max = self.thread_count_max;
let listener_quit_flag = self.listener_quit_flag.clone();
let accept_retry_interval = self.accept_retry_interval;

let reaper_tx = match self.reaper.take() {
None => {
Expand Down Expand Up @@ -373,6 +383,14 @@ impl Server {
}
Err(e) => {
error!("listener accept got {:?}", e);

// Resource limit errors can't be recoverd in short time
// and the poll(2) is level-triggered, an uncorrected error can lead to an infinite loop,
// so we sleep for a while and wait for the error to be corrected.
if is_resource_limit_error(e) {
thread::sleep(accept_retry_interval);
}

continue;
}
};
Expand Down Expand Up @@ -597,3 +615,11 @@ fn quit_connection(quit: Arc<AtomicBool>, control_tx: SyncSender<()>) {
.send(())
.unwrap_or_else(|err| debug!("Failed to send {:?}", err));
}

fn is_resource_limit_error(e: std::io::Error) -> bool {
if let Some(err) = e.raw_os_error() {
return [libc::EMFILE, libc::ENFILE, libc::ENOBUFS, libc::ENOMEM].contains(&err);
}

false
}
Loading