Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sampling service #302

Merged
merged 8 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 55 additions & 6 deletions xprof/xprof.rb.in
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ end

def sampling?
return false unless OPTIONS[:sample]
env_fetch_first('LTTNG_UST_SAMPLING_MASTER_ONLY', default: '1') == '0' || mpi_local_master?
mpi_local_master?
end

def env_tracers
Expand Down Expand Up @@ -732,9 +732,51 @@ def gm_rename_folder
thapi_trace_dir_root
end

class SamplingDaemon
SIGRTMIN = 34
SIG_SAMPLING_READY = SIGRTMIN
SIG_SAMPLING_FINISH = SIGRTMIN + 1

attr_reader :pid

def initialize
@pid = nil
end

def start(parent_pid)
return unless sampling?

daemon_path = "#{__dir__}/sampling_daemon"
raise "No sampling_daemon binary found at #{daemon_path}" unless File.exist?(daemon_path)

@pid = spawn("#{daemon_path} #{parent_pid}")
Process.detach(@pid)

wait_for_ready_signal
end

def finalize
return unless @pid

Process.kill(SIG_SAMPLING_FINISH, @pid)
sbekele81 marked this conversation as resolved.
Show resolved Hide resolved

wait_for_ready_signal
end

private

def wait_for_ready_signal
received_ready = false
Signal.trap(SIG_SAMPLING_READY) do
received_ready = true
end
sleep(0.1) until received_ready # Wait until READY signal is received
end
end

# Start, Stop lttng, amd do the on-node analsysis
def trace_and_on_node_processing(usr_argv)
def teardown_lttng(syncd, pids)
def teardown_lttng(syncd, sampling_daemon, pids)
# We need to be sure that all the local ranks are finished
syncd.local_barrier('waiting_for_application_ending')

Expand All @@ -754,9 +796,11 @@ def trace_and_on_node_processing(usr_argv)
end
# we can kill the session daemon
lm_lttng_kill_sessiond
sampling_daemon&.finalize
Kerilk marked this conversation as resolved.
Show resolved Hide resolved
end

SyncDaemon.open do |syncd|
sampling_daemon = nil
# Load Tracers and APILoaders Lib
backends, h = env_tracers

Expand All @@ -767,19 +811,24 @@ def trace_and_on_node_processing(usr_argv)
pids = if mpi_local_master?
lm_setup_lttng(backends)
lm_babeltrace(backends) if OPTIONS[:archive]
end
sbekele81 marked this conversation as resolved.
Show resolved Hide resolved
end

syncd.local_barrier('waiting_for_lttng_setup')

if sampling?
sampling_daemon = SamplingDaemon.new
sampling_daemon&.start(Process.pid)
end

# Launch User Command
begin
XprofExitCode.update(launch_usr_bin(h, usr_argv), usr_argv.join(' '))
rescue Errno::ENOENT
teardown_lttng(syncd, pids)
teardown_lttng(syncd, sampling_daemon, pids)
raise
end

teardown_lttng(syncd, pids)
teardown_lttng(syncd, sampling_daemon, pids)
return unless mpi_local_master?

# Preprocess trace
Expand Down
21 changes: 15 additions & 6 deletions ze/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ EXTRA_DIST += \
ze_model.rb \
gen_babeltrace_ze_model.rb


ZE_PROBES = $(ZE_NAMESPACES:=_tracepoints) $(ZE_STRUCTS_NAMESPACES:=_tracepoints)
ZE_PROBES_TP = $(ZE_PROBES:=.tp)
ZE_PROBES_INCL = $(ZE_PROBES:=.h)
Expand Down Expand Up @@ -148,14 +149,26 @@ BUILT_SOURCES = \
$(ZE_PROBES_INCL) \
$(ZE_STATIC_PROBES_INCL)

bin_PROGRAMS = sampling_daemon

sampling_daemon_SOURCES = sampling_daemon.c

nodist_sampling_daemon_SOURCES = \
$(ZE_PROBES_INCL) \
$(ZE_STATIC_PROBES_INCL)

sampling_daemon_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(top_srcdir)/sampling -I$(top_srcdir)/ze/include -I./
sampling_daemon_CFLAGS = -Wall -Wextra $(WERROR) $(LTTNG_UST_CFLAGS)
sampling_daemon_LDADD = libzetracepoints.la -ldl -lpthread $(LTTNG_UST_LIBS) ../sampling/libThapiSampling.la

tracer_ze.c: $(srcdir)/gen_ze.rb $(srcdir)/tracer_ze_helpers.include.c $(srcdir)/ze.h.include $(ZE_MODEL) $(ZE_PROBES_INCL) $(ZE_STATIC_PROBES_INCL)
SRC_DIR=$(srcdir) $(RUBY) $< > $@

EXTRA_DIST += \
gen_ze.rb \
tracer_ze_helpers.include.c

CLEANFILES += tracer_ze.c
CLEANFILES += tracer_ze.c sampling_daemon

bin_SCRIPTS = \
tracer_ze.sh
Expand All @@ -172,11 +185,7 @@ libzetracepoints_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/inclu
libzetracepoints_la_CFLAGS = -fPIC -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Wno-sign-compare $(WERROR) $(LTTNG_UST_CFLAGS)
libzetracepoints_la_LDFLAGS = $(LTTNG_UST_LIBS)

zedir = $(pkglibdir)/ze
ze_LTLIBRARIES = libze_loader.la

bt2dir = $(pkglibdir)/bt2
bt2_LTLIBRARIES = libZEInterval.la
sbekele81 marked this conversation as resolved.
Show resolved Hide resolved
lib_LTLIBRARIES = libze_loader.la libZEInterval.la

nodist_libze_loader_la_SOURCES = \
$(ZE_PROBES_INCL) \
Expand Down
Loading
Loading