-
Notifications
You must be signed in to change notification settings - Fork 349
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
pfring_recv doesn't return while there is traffic #816
Comments
@chxd please update to latest stable (8.0) or dev (8.1) as we do not support old pf_ring versions. Thank you. |
I'm sorry for not mentioning that I'm also using 8.0.0,and the problem still exists. |
I can confirm this problem still persists on $ apt show pfring
Package: pfring
Version: 8.4.0-8225
...
Description: PF_RING (http://www.ntop.org/pf_ring/) [8.4.0-stable:cbf163637c6d7d49fac5baef71538cc559330ca2] I had created threaded solution that detects problem, logs it and successfully mitigates it. I'm not 100% happy with it because it still looses N seconds of traffic. For testing purposes, I had started 8 application instances on 8 different interfaces (much simplified, but representable code is below). In two weeks of uninterrupted running, these 8 apps logged detection and mitigation messages 5 times. All of them were able to successfully continue working after pfring "connection" was reset. All events that triggered these messages, happened on different times and on different interfaces. I hadn't been able to correlate these events to anything sensible on system running these apps. To make extra sure that my app code wasn't causing this by introducing it's own deadlocks, these 8 instances were specially built with all pthreads API calls replaced by "checked" calls similar to following: // foo_app_threading.h
#ifdef FOO_APP_CHECKED_THREADING
#define FOO_APP_MUTEX_INITIALIZER PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP
#define FOO_APP_MUTEX_LOCK(x) foo_app_mutex_lock(x)
int foo_app_mutex_lock(pthread_mutex_t *mtx);
#else // FOO_APP_CHECKED_THREADING
#define FOO_APP_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
#define FOO_APP_MUTEX_LOCK(x) pthread_mutex_lock(x)
#endif // FOO_APP_CHECKED_THREADING
// foo_app_threading.c
int foo_app_mutex_lock(pthread_mutex_t *m) {
int retv = pthread_mutex_trylock(m);
if (retv != 0) {
syslog(LOG_CRIT, "pthread_mutex_trylock failed, %d", retv);
abort();
}
return retv;
} During testing period, not a single log + abort related to app's threading code was detected on any of 8 interfaces. Finally, here is a minimal working code demonstrating how all of this works in my app. Basically, I'm running #include <pfring.h>
#include <pfring_ft.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/time.h>
#include <syslog.h>
#include <unistd.h>
static char *device = "wlp0s20f3";
static pfring *ring = NULL;
static pthread_t recv_loop;
static bool recv_loop_is_done = false;
static struct timeval last_recv_at;
static pthread_t monitoring_loop;
static bool monitoring_is_done = false;
static void *recv_loop_f(void *data);
static void *monitoring_loop_f(void *data);
static bool create_ring_handle();
static void close_monitoring_loop();
static void close_recv_loop_and_ring();
static bool app_is_done = false;
int main(int argc, char *argv[]) {
const char *app_name = "foo_app";
openlog(app_name, LOG_PID, LOG_USER);
create_ring_handle();
pthread_create(&recv_loop, NULL, recv_loop_f, NULL);
pthread_create(&monitoring_loop, NULL, monitoring_loop_f, NULL);
// Better solution for main thread waiting in production code, irrelevant for this
// example
while (!app_is_done) {
usleep(10000);
}
close_recv_loop_and_ring();
bool was_started = !monitoring_is_done;
monitoring_is_done = true;
if (was_started && recv_loop) { pthread_join(recv_loop, NULL); }
}
static bool create_ring_handle() {
uint32_t ring_flags = 0;
ring_flags |= PF_RING_PROMISC;
ring_flags |= PF_RING_TIMESTAMP;
ring = pfring_open(device, 10240, ring_flags);
pfring_set_application_name(ring, "foo_app");
pfring_set_direction(ring, rx_and_tx_direction);
pfring_set_socket_mode(ring, recv_only_mode);
pfring_enable_ring(ring);
}
static void close_recv_loop_and_ring() {
bool was_started = !recv_loop_is_done;
recv_loop_is_done = true;
if (was_started) {
// Allow some time for recv_loop thread to close
usleep(10);
}
if (ring) {
pfring_disable_ring(ring);
pfring_breakloop(ring);
pfring_close(ring);
}
if (was_started && recv_loop) { pthread_join(recv_loop, NULL); }
ring = NULL;
if (was_started) { syslog(LOG_INFO, "Closed PF_RING client!"); }
}
static pfring_ft_action
stub_process_packet(const uint8_t *packet, struct pfring_pkthdr *hdr) {
return PFRING_FT_ACTION_DEFAULT;
}
static void *recv_loop_f(void *data) {
bool is_done;
struct pfring_pkthdr ring_hdr;
uint8_t *buffer_p = NULL;
bool packet_received = false;
do {
is_done = recv_loop_is_done;
if (!is_done) {
packet_received = (pfring_recv(ring, &buffer_p, 0, &ring_hdr, 0) == 1);
if (packet_received) {
last_recv_at = ring_hdr.ts;
stub_process_packet(buffer_p, &ring_hdr);
}
}
} while (!is_done);
return NULL;
}
static void *monitoring_loop_f(void *data) {
bool is_done = false;
unsigned int duration_slept_useconds = 0;
static const unsigned int COMPARE_EVERY_USECONDS = 5 * 1000000UL; // 5s
static const unsigned int WARN_IF_NO_PACKETS_FOR_MILLISECONDS = 10 * 1000; // 10s
static const unsigned int SLEEP_FOR_USECONDS = 100000UL; // 0.1s
struct timespec now;
do {
is_done = monitoring_is_done;
if (!is_done) {
usleep(SLEEP_FOR_USECONDS);
duration_slept_useconds += SLEEP_FOR_USECONDS;
if (duration_slept_useconds >= COMPARE_EVERY_USECONDS) {
duration_slept_useconds = 0;
clock_gettime(CLOCK_REALTIME, &now);
unsigned int no_packets_for_useconds =
(now.tv_sec * 1000000UL + now.tv_nsec / 1000)
- (last_recv_at.tv_sec * 1000000UL + last_recv_at.tv_usec);
double no_packets_for_milliseconds = no_packets_for_useconds / 1000.0;
if (no_packets_for_milliseconds > WARN_IF_NO_PACKETS_FOR_MILLISECONDS) {
syslog(
LOG_WARNING,
"pfring_recv hasn't received any packets for %.02f! Will try to to restart "
"PF_RING client.",
no_packets_for_milliseconds
);
close_recv_loop_and_ring();
bool success = create_ring_handle()
&& (pthread_create(&recv_loop, NULL, recv_loop_f, NULL) == 0);
if (!success) {
syslog(LOG_CRIT, "Failed to re-start PF_RING client! Aborting.");
abort();
} else {
syslog(LOG_INFO, "Full reboot of PF_RING client successfully completed.");
}
}
}
}
} while (!is_done);
return NULL;
} |
@tadams42 thank you for your analysis, we will analyse the code asap |
Hi,
I'm using pf_ring 7.6.0 in my program to capture packets. The program almost definitely will stop working after running for several hours. I did some debugging and found that the program ran into a situation in which
tot_read == tot_insert
whileinsert_off != remove_off
. I think this is unexpected but can't figure out how it get there.I went over the issues list, but found no similar issues, so I created this one.
Here is some information I can provide:
Any help is appreciated, thanks.
The text was updated successfully, but these errors were encountered: