Skip to content

Commit

Permalink
fdctl: add combined run and monitor command
Browse files Browse the repository at this point in the history
`fddev dev` gets an argument `--monitor` which combines
both running and monitoring in one command. Basically, it
spawns them each in a separate process, and wires stdout
from Firedancer to a pipe in the monitor so the log messages
can be cleanly written between output refreshes.
  • Loading branch information
mmcgee-jump committed Aug 17, 2023
1 parent c051359 commit 299034f
Show file tree
Hide file tree
Showing 8 changed files with 176 additions and 48 deletions.
4 changes: 2 additions & 2 deletions config/everything.mk
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ check-lint:
ifeq (run,$(firstword $(MAKECMDGOALS)))
RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
ifeq ($(RUN_ARGS),)
RUN_ARGS := dev
RUN_ARGS := dev --monitor
endif
$(eval $(RUN_ARGS):;@:)
endif
Expand All @@ -95,7 +95,7 @@ endif
cargo:

solana/target/release/libsolana_validator_fd.a: cargo
cd ./solana && ./cargo build --release -p solana-validator-fd
cd ./solana && env --unset=LDFLAGS ./cargo build --release -p solana-validator-fd

$(OBJDIR)/lib/libsolana_validator_fd.a: solana/target/release/libsolana_validator_fd.a
$(MKDIR) $(dir $@) && cp solana/target/release/libsolana_validator_fd.a $@
Expand Down
4 changes: 4 additions & 0 deletions src/app/fdctl/fdctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ typedef union {
long duration;
uint seed;
double ns_per_tic;
int drain_output_fd;
} monitor;
struct {
int command;
Expand All @@ -29,6 +30,9 @@ typedef union {
struct {
int tile;
} run1;
struct {
int monitor;
} dev;
} args_t;

typedef struct security security_t;
Expand Down
7 changes: 0 additions & 7 deletions src/app/fdctl/monitor/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,6 @@

/* TEXT_* are quick-and-dirty color terminal hacks. Probably should
do something more robust longer term. */
#define TEXT_ALTBUF_ENABLE "\033[?1049h"
#define TEXT_ALTBUF_DISABLE "\033[?1049l"
#define TEXT_CUP_HOME "\033[H"
#define TEXT_ED "\033[J"
#define TEXT_EL "\033[K"
#define TEXT_NEWLINE TEXT_EL "\n"

#define TEXT_NOCURSOR "\033[?25l"
#define TEXT_CURSOR "\033[?25h"

Expand Down
105 changes: 70 additions & 35 deletions src/app/fdctl/monitor/monitor.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@ void
monitor_cmd_args( int * pargc,
char *** pargv,
args_t * args ) {
args->monitor.dt_min = fd_env_strip_cmdline_long( pargc, pargv, "--dt-min", NULL, 6666667. );
args->monitor.dt_max = fd_env_strip_cmdline_long( pargc, pargv, "--dt-max", NULL, 133333333. );
args->monitor.duration = fd_env_strip_cmdline_long( pargc, pargv, "--duration", NULL, 0. );
args->monitor.seed = fd_env_strip_cmdline_uint( pargc, pargv, "--seed", NULL, (uint)fd_tickcount() );
args->monitor.ns_per_tic = 1./fd_tempo_tick_per_ns( NULL ); /* calibrate during init */
args->monitor.drain_output_fd = 0; /* only accessible to development commands, not the command line */
args->monitor.dt_min = fd_env_strip_cmdline_long( pargc, pargv, "--dt-min", NULL, 6666667. );
args->monitor.dt_max = fd_env_strip_cmdline_long( pargc, pargv, "--dt-max", NULL, 133333333. );
args->monitor.duration = fd_env_strip_cmdline_long( pargc, pargv, "--duration", NULL, 0. );
args->monitor.seed = fd_env_strip_cmdline_uint( pargc, pargv, "--seed", NULL, (uint)fd_tickcount() );
args->monitor.ns_per_tic = 1./fd_tempo_tick_per_ns( NULL ); /* calibrate during init */

if( FD_UNLIKELY( args->monitor.dt_min<0L ) ) FD_LOG_ERR(( "--dt-min should be positive" ));
if( FD_UNLIKELY( args->monitor.dt_max<args->monitor.dt_min ) ) FD_LOG_ERR(( "--dt-max should be at least --dt-min" ));
Expand Down Expand Up @@ -146,9 +147,29 @@ static int stop1 = 0;
#define FD_MONITOR_TEXT_BUF_SZ 16384
char buffer[ FD_MONITOR_TEXT_BUF_SZ ];

static void
drain_to_buffer( char ** buf,
ulong * buf_sz,
int fd ) {
while(1) {
long nread = read( fd, *buf, *buf_sz );
if( FD_LIKELY( nread == -1 && errno == EAGAIN ) ) break; /* no data available */
else if( FD_UNLIKELY( nread == -1 ) ) FD_LOG_ERR(( "read() failed (%d-%s)", errno, strerror( errno ) ));
*buf += nread;
*buf_sz -= (ulong)nread;

if( FD_UNLIKELY( *buf_sz == 0 ) ) {
write_stdout( buffer, FD_MONITOR_TEXT_BUF_SZ );
*buf = buffer;
*buf_sz = FD_MONITOR_TEXT_BUF_SZ;
}
}
}

void
run_monitor( config_t * const config,
const uchar ** pods,
int drain_output_fd,
long dt_min,
long dt_max,
long duration,
Expand Down Expand Up @@ -286,7 +307,7 @@ run_monitor( config_t * const config,
FD_LOG_NOTICE(( "monitoring --dt-min %li ns, --dt-max %li ns, --duration %li ns, --seed %u", dt_min, dt_max, duration, seed ));

long stop = then + duration;
if( duration == 0) stop = LONG_MAX;
if( duration == 0 ) stop = LONG_MAX;

#define PRINT( ... ) do { \
int n = snprintf( buf, buf_sz, __VA_ARGS__ ); \
Expand All @@ -295,9 +316,7 @@ run_monitor( config_t * const config,
buf += n; buf_sz -= (ulong)n; \
} while(0)

write_stdout( TEXT_ALTBUF_ENABLE, sizeof(TEXT_ALTBUF_ENABLE)-1 );
write_stdout( TEXT_NOCURSOR, sizeof(TEXT_NOCURSOR)-1 );

ulong line_count = 0;
for(;;) {
/* Wait a somewhat randomized amount and then make a diagnostic
snapshot */
Expand All @@ -310,17 +329,28 @@ run_monitor( config_t * const config,
/* Pretty print a comparison between this diagnostic snapshot and
the previous one. */

/* FIXME: CONSIDER DOING CNC ACKS AND INCL IN SNAPSHOT */
/* FIXME: CONSIDER INCLUDING TILE UPTIME */
/* FIXME: CONSIDER ADDING INFO LIKE PID OF INSTANCE */

char * buf = buffer;
ulong buf_sz = FD_MONITOR_TEXT_BUF_SZ;

/* move to beginning of line, n lines ago, then erase rest of screen */
PRINT( "\033[%luF\033[0J", line_count );

/* drain any firedancer log messages into the terminal */
if( FD_UNLIKELY( drain_output_fd ) > 0 ) drain_to_buffer( &buf, &buf_sz, drain_output_fd );
if( FD_UNLIKELY( buf_sz < FD_MONITOR_TEXT_BUF_SZ / 2 ) ) {
/* make sure there's enough space to print the whole monitor in one go */
write_stdout( buffer, FD_MONITOR_TEXT_BUF_SZ - buf_sz );
buf = buffer;
buf_sz = FD_MONITOR_TEXT_BUF_SZ;
}

char * mon_start = buf;
if( FD_UNLIKELY( drain_output_fd ) > 0 ) PRINT("\n");

char now_cstr[ FD_LOG_WALLCLOCK_CSTR_BUF_SZ ];
PRINT( "snapshot for %s" TEXT_NEWLINE, fd_log_wallclock_cstr( now, now_cstr ) );
PRINT( " tile | stale | heart | sig | in backp | backp cnt | sv_filt cnt " TEXT_NEWLINE );
PRINT( "--------+------------+-------+------------+----------+---------------------+---------------------" TEXT_NEWLINE );
PRINT( "snapshot for %s\n", fd_log_wallclock_cstr( now, now_cstr ) );
PRINT( " tile | stale | heart | sig | in backp | backp cnt | sv_filt cnt \n" );
PRINT( "--------+------------+-------+------------+----------+---------------------+---------------------\n" );
for( ulong tile_idx=0UL; tile_idx<tile_cnt; tile_idx++ ) {
tile_snap_t * prv = &tile_snap_prv[ tile_idx ];
tile_snap_t * cur = &tile_snap_cur[ tile_idx ];
Expand All @@ -331,11 +361,11 @@ run_monitor( config_t * const config,
PRINT( " | " ); printf_err_bool( &buf, &buf_sz, cur->cnc_diag_in_backp, prv->cnc_diag_in_backp );
PRINT( " | " ); printf_err_cnt ( &buf, &buf_sz, cur->cnc_diag_backp_cnt, prv->cnc_diag_backp_cnt );
PRINT( " | " ); printf_err_cnt ( &buf, &buf_sz, cur->cnc_diag_sv_filt_cnt, prv->cnc_diag_sv_filt_cnt );
PRINT( TEXT_NEWLINE );
PRINT( "\n" );
}
PRINT( TEXT_NEWLINE );
PRINT( " link | tot TPS | tot bps | uniq TPS | uniq bps | ha tr%% | uniq bw%% | filt tr%% | filt bw%% | ovrnp cnt | ovrnr cnt | slow cnt" TEXT_NEWLINE );
PRINT( "----------------+----------+----------+----------+----------+----------+----------+----------+----------+---------------------+---------------------+---------------------" TEXT_NEWLINE );
PRINT( "\n" );
PRINT( " link | tot TPS | tot bps | uniq TPS | uniq bps | ha tr%% | uniq bw%% | filt tr%% | filt bw%% | ovrnp cnt | ovrnr cnt | slow cnt\n" );
PRINT( "----------------+----------+----------+----------+----------+----------+----------+----------+----------+---------------------+---------------------+---------------------\n" );
long dt = now-then;
for( ulong link_idx=0UL; link_idx<link_cnt; link_idx++ ) {
link_snap_t * prv = &link_snap_prv[ link_idx ];
Expand Down Expand Up @@ -363,26 +393,23 @@ run_monitor( config_t * const config,
PRINT( " | " ); printf_err_cnt( &buf, &buf_sz, cur->fseq_diag_ovrnp_cnt, prv->fseq_diag_ovrnp_cnt );
PRINT( " | " ); printf_err_cnt( &buf, &buf_sz, cur->fseq_diag_ovrnr_cnt, prv->fseq_diag_ovrnr_cnt );
PRINT( " | " ); printf_err_cnt( &buf, &buf_sz, cur->fseq_diag_slow_cnt, prv->fseq_diag_slow_cnt );
PRINT( TEXT_NEWLINE );
PRINT( "\n" );
}

/* write entire monitor output buffer */
write_stdout( buffer, sizeof(buffer) - buf_sz );

if( FD_UNLIKELY( stop1 || (now-stop)>=0L ) ) {
/* Stop once we've been monitoring for duration ns */
write_stdout( TEXT_ALTBUF_DISABLE, sizeof(TEXT_ALTBUF_DISABLE)-1 );
write_stdout( buffer, sizeof(buffer) - buf_sz );
write_stdout( TEXT_CURSOR, sizeof(TEXT_CURSOR)-1 );
break;
}

/* clear terminal and write new buffer */
write_stdout( TEXT_ED
TEXT_CUP_HOME,
sizeof(TEXT_ED)-1
+ sizeof(TEXT_CUP_HOME)-1 );
write_stdout( buffer, sizeof(buffer) - buf_sz );

/* Still more monitoring to do ... wind up for the next iteration by
swaping the two snap arrays. */
line_count = 0;
for ( ulong i=(ulong)(mon_start-buffer); i<sizeof(buffer) - buf_sz; i++ ) {
if( buffer[i] == '\n' ) line_count++;
}

then = now; tic = toc;
tile_snap_t * tmp = tile_snap_prv; tile_snap_prv = tile_snap_cur; tile_snap_cur = tmp;
Expand All @@ -393,7 +420,7 @@ run_monitor( config_t * const config,
static void
signal1( int sig ) {
(void)sig;
stop1 = 1;
exit_group( 0 );
}

void
Expand Down Expand Up @@ -421,26 +448,34 @@ monitor_cmd_fn( args_t * args,
__NR_nanosleep, /* fd_log_wait_until */
__NR_sched_yield, /* fd_log_wait_until */
__NR_exit_group, /* exit process */
__NR_rt_sigreturn, /* return from signal handler */
__NR_read, /* read from firedancer stdout to interpose their log messages */
};

int allow_fds[] = {
1, /* stdout */
2, /* stderr */
3, /* logfile */
args->monitor.drain_output_fd, /* maybe we are interposing firedancer log output with the monitor */
};

ulong num_fds = sizeof(allow_fds)/sizeof(allow_fds[0]);
ushort num_syscalls = sizeof(allow_syscalls)/sizeof(allow_syscalls[0]);

ulong allow_fds_sz = args->monitor.drain_output_fd >= 0 ? num_fds : num_fds - 1;
ushort allow_syscalls_sz = args->monitor.drain_output_fd >= 0 ? num_syscalls : (ushort)(num_syscalls - 1);

if( FD_UNLIKELY( close( 0 ) ) ) FD_LOG_ERR(( "close(0) failed (%i-%s)", errno, strerror( errno ) ));
if( config->development.sandbox )
fd_sandbox( config->uid,
config->gid,
sizeof(allow_fds)/sizeof(allow_fds[ 0 ]),
allow_fds_sz,
allow_fds,
sizeof(allow_syscalls)/sizeof(allow_syscalls[0]),
allow_syscalls_sz,
allow_syscalls );

run_monitor( config,
pods,
args->monitor.drain_output_fd,
args->monitor.dt_min,
args->monitor.dt_max,
args->monitor.duration,
Expand Down
4 changes: 4 additions & 0 deletions src/app/fdctl/run.c
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,10 @@ main_pid_namespace( void * args ) {
bringing all of our processes down as a group. */
int wstatus;
pid_t exited_pid = wait4( -1, &wstatus, (int)__WCLONE, NULL );
if( FD_UNLIKELY( exited_pid == -1 ) ) {
fd_log_private_fprintf_0( STDERR_FILENO, "wait4() failed (%i-%s)", errno, strerror( errno ) );
exit_group( 1 );
}

char * name = "unknown";
ulong tile_idx = ULONG_MAX;
Expand Down
93 changes: 90 additions & 3 deletions src/app/fddev/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,16 @@
#include <stdio.h>
#include <unistd.h>
#include <sched.h>
#include <fcntl.h>
#include <sys/wait.h>

void
dev_cmd_args( int * pargc,
char *** pargv,
args_t * args ) {
args->dev.monitor = fd_env_strip_cmdline_contains( pargc, pargv, "--monitor" );
}

void
dev_cmd_perm( args_t * args,
security_t * security,
Expand All @@ -27,11 +35,36 @@ dev_cmd_perm( args_t * args,
run_cmd_perm( NULL, security, config );
}

pid_t firedancer_pid, monitor_pid;
extern char fd_log_private_path[ 1024 ]; /* empty string on start */

static void
parent_signal( int sig ) {
(void)sig;
int err = 0;
if( FD_LIKELY( firedancer_pid ) )
if( kill( firedancer_pid, SIGINT ) ) err = 1;
if( FD_LIKELY( monitor_pid ) )
if( kill( monitor_pid, SIGKILL ) ) err = 1;
fd_log_private_fprintf_0( STDERR_FILENO, "Log at \"%s\"\n", fd_log_private_path );
exit_group( err );
}

static void
install_parent_signals( void ) {
struct sigaction sa = {
.sa_handler = parent_signal,
.sa_flags = 0,
};
if( FD_UNLIKELY( sigaction( SIGTERM, &sa, NULL ) ) )
FD_LOG_ERR(( "sigaction(SIGTERM) failed (%i-%s)", errno, strerror( errno ) ));
if( FD_UNLIKELY( sigaction( SIGINT, &sa, NULL ) ) )
FD_LOG_ERR(( "sigaction(SIGINT) failed (%i-%s)", errno, strerror( errno ) ));
}

void
dev_cmd_fn( args_t * args,
config_t * const config ) {
(void)args;

args_t configure_args = {
.configure.command = CONFIGURE_CMD_INIT,
};
Expand All @@ -54,5 +87,59 @@ dev_cmd_fn( args_t * args,
leave_network_namespace();
}

run_firedancer( config );
if( FD_LIKELY( !args->dev.monitor ) ) run_firedancer( config );
else {
install_parent_signals();

int pipefd[2];
if( FD_UNLIKELY( pipe2( pipefd, O_NONBLOCK ) ) ) FD_LOG_ERR(( "pipe2() failed (%d-%s)", errno, strerror( errno ) ));

firedancer_pid = fork();
if( !firedancer_pid ) {
if( FD_UNLIKELY( close( pipefd[0] ) ) ) FD_LOG_ERR(( "close() failed (%d-%s)", errno, strerror( errno ) ));
if( FD_UNLIKELY( dup2( pipefd[1], STDERR_FILENO ) == -1 ) ) FD_LOG_ERR(( "dup2() failed (%d-%s)", errno, strerror( errno ) ));
if( FD_UNLIKELY( close( pipefd[1] ) ) ) FD_LOG_ERR(( "close() failed (%d-%s)", errno, strerror( errno ) ));
if( FD_UNLIKELY( setenv( "RUST_LOG_STYLE", "always", 1 ) ) ) /* otherwise RUST_LOG will not be colorized to the pipe */
FD_LOG_ERR(( "setenv() failed (%d-%s)", errno, strerror( errno ) ));
run_firedancer( config );
} else {
if( FD_UNLIKELY( close( pipefd[1] ) ) ) FD_LOG_ERR(( "close() failed (%d-%s)", errno, strerror( errno ) ));
}

args_t monitor_args;
int argc = 0;
char * argv[] = { NULL };
char ** pargv = (char**)argv;
monitor_cmd_args( &argc, &pargv, &monitor_args );
monitor_args.monitor.drain_output_fd = pipefd[0];

monitor_pid = fork();
if( !monitor_pid ) monitor_cmd_fn( &monitor_args, config );
if( FD_UNLIKELY( close( pipefd[0] ) ) ) FD_LOG_ERR(( "close() failed (%d-%s)", errno, strerror( errno ) ));

int wstatus;
pid_t exited_pid = wait4( -1, &wstatus, (int)__WALL, NULL );
if( FD_UNLIKELY( exited_pid == -1 ) ) {
fd_log_private_fprintf_0( STDERR_FILENO, "wait4() failed (%i-%s)", errno, strerror( errno ) );
exit_group( 1 );
}

char * exited_child = exited_pid == firedancer_pid ? "firedancer" : exited_pid == monitor_pid ? "monitor" : "unknown";
int exit_code = 0;
if( FD_UNLIKELY( !WIFEXITED( wstatus ) ) ) {
FD_LOG_ERR(( "%s exited unexpectedly with signal %d (%s)", exited_child, WTERMSIG( wstatus ), strsignal( WTERMSIG( wstatus ) ) ));
exit_code = WTERMSIG( wstatus );
} else {
FD_LOG_ERR(( "%s exited unexpectedly with code %d", exited_child, WEXITSTATUS( wstatus ) ));
if( FD_UNLIKELY( exited_pid == monitor_pid && !WEXITSTATUS( wstatus ) ) ) exit_code = EXIT_FAILURE;
else exit_code = WEXITSTATUS( wstatus );
}

if( FD_UNLIKELY( exited_pid == monitor_pid ) ) {
if( FD_UNLIKELY( kill( firedancer_pid, SIGKILL ) ) ) FD_LOG_ERR(( "failed to kill all processes (%d-%s)", errno, strerror( errno ) ));
} else {
if( FD_UNLIKELY( kill( monitor_pid, SIGKILL ) ) ) FD_LOG_ERR(( "failed to kill all processes (%d-%s)", errno, strerror( errno ) ));
}
exit_group( exit_code );
}
}
5 changes: 5 additions & 0 deletions src/app/fddev/fddev.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@

#include "../fdctl/fdctl.h"

void
dev_cmd_args( int * pargc,
char *** pargv,
args_t * args );

void
dev_cmd_perm( args_t * args,
security_t * security,
Expand Down
Loading

0 comments on commit 299034f

Please sign in to comment.