Skip to content

Commit

Permalink
wait before profiling newly created containers
Browse files Browse the repository at this point in the history
  • Loading branch information
slicklash committed Jun 17, 2024
1 parent 86995b1 commit fef1e28
Show file tree
Hide file tree
Showing 10 changed files with 68 additions and 93 deletions.
9 changes: 8 additions & 1 deletion gprofiler/containers_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import time
from typing import Dict, List, Optional, Set

from granulate_utils.containers.client import ContainersClient
Expand All @@ -25,6 +26,8 @@

logger = get_logger_adapter(__name__)

NEWLY_CREATED_CONTAINER_AGE_IN_SECONDS = 3


class ContainerNamesClient:
def __init__(self) -> None:
Expand Down Expand Up @@ -73,9 +76,13 @@ def get_container_name(self, pid: int) -> str:
def _safely_get_process_container_name(self, pid: int) -> Optional[str]:
try:
try:
container_id = get_process_container_id(Process(pid))
process = Process(pid)
container_id = get_process_container_id(process)
if container_id is None:
return None
# If the container is newly created, we wait a bit to make sure the container is available
if time.time() - process.create_time() <= NEWLY_CREATED_CONTAINER_AGE_IN_SECONDS:
time.sleep(2)
except NoSuchProcess:
return None
return self._get_container_name(container_id)
Expand Down
2 changes: 1 addition & 1 deletion gprofiler/profilers/java.py
Original file line number Diff line number Diff line change
Expand Up @@ -1229,6 +1229,7 @@ def _check_async_profiler_loaded(self, process: Process) -> bool:
def _profile_process(self, process: Process, duration: int, spawned: bool) -> ProfileData:
comm = process_comm(process)
exe = process_exe(process)
container_name = self._profiler_state.get_container_name(process.pid)
java_version_output: Optional[str] = get_java_version_logged(process, self._profiler_state.stop_event)

if self._enabled_proc_events_java:
Expand Down Expand Up @@ -1258,7 +1259,6 @@ def _profile_process(self, process: Process, duration: int, spawned: bool) -> Pr
self._profiled_pids.add(process.pid)

logger.info(f"Profiling{' spawned' if spawned else ''} process {process.pid} with async-profiler")
container_name = self._profiler_state.get_container_name(process.pid)
app_metadata = self._metadata.get_metadata(process)
appid = application_identifiers.get_java_app_id(process, self._collect_spark_app_name)

Expand Down
3 changes: 2 additions & 1 deletion gprofiler/profilers/php.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,10 +210,11 @@ def extract_metadata_section(re_expr: Pattern, metadata_line: str) -> str:
if profiler_state.processes_to_profile is not None:
if pid not in [process.pid for process in profiler_state.processes_to_profile]:
continue
container_name = profiler_state.get_container_name(pid)
# TODO: appid & app metadata for php!
appid = None
app_metadata = None
profiles[pid] = ProfileData(results[pid], appid, app_metadata, profiler_state.get_container_name(pid))
profiles[pid] = ProfileData(results[pid], appid, app_metadata, container_name)

return profiles

Expand Down
2 changes: 1 addition & 1 deletion gprofiler/profilers/python_ebpf.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,9 @@ def snapshot(self) -> ProcessToProfileData:
if self._profiler_state.processes_to_profile is not None:
if process not in self._profiler_state.processes_to_profile:
continue
container_name = self._profiler_state.get_container_name(pid)
appid = application_identifiers.get_python_app_id(process)
app_metadata = self._metadata.get_metadata(process)
container_name = self._profiler_state.get_container_name(pid)
except NoSuchProcess:
appid = None
app_metadata = None
Expand Down
49 changes: 2 additions & 47 deletions gprofiler/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,45 +90,7 @@ def is_root() -> bool:
return os.geteuid() == 0


libc: Optional[ctypes.CDLL] = None


def prctl(*argv: Any) -> int:
global libc
if libc is None:
libc = ctypes.CDLL("libc.so.6", use_errno=True)
return cast(int, libc.prctl(*argv))


PR_SET_PDEATHSIG = 1


def set_child_termination_on_parent_death() -> int:
ret = prctl(PR_SET_PDEATHSIG, signal.SIGTERM)
if ret != 0:
errno = ctypes.get_errno()
logger.warning(
f"Failed to set parent-death signal on child process. errno: {errno}, strerror: {os.strerror(errno)}"
)
return ret


def wrap_callbacks(callbacks: List[Callable]) -> Callable:
# Expects array of callback.
# Returns one callback that call each one of them, and returns the retval of last callback
def wrapper() -> Any:
ret = None
for cb in callbacks:
ret = cb()

return ret

return wrapper


def start_process(
cmd: Union[str, List[str]], via_staticx: bool = False, term_on_parent_death: bool = True, **kwargs: Any
) -> Popen:
def start_process(cmd: Union[str, List[str]], via_staticx: bool = False, **kwargs: Any) -> Popen:
if isinstance(cmd, str):
cmd = [cmd]

Expand All @@ -150,19 +112,12 @@ def start_process(
env = env if env is not None else os.environ.copy()
env.update({"LD_LIBRARY_PATH": ""})

if is_windows():
cur_preexec_fn = None # preexec_fn is not supported on Windows platforms. subprocess.py reports this.
else:
cur_preexec_fn = kwargs.pop("preexec_fn", os.setpgrp)
if term_on_parent_death:
cur_preexec_fn = wrap_callbacks([set_child_termination_on_parent_death, cur_preexec_fn])

popen = Popen(
cmd,
stdout=kwargs.pop("stdout", subprocess.PIPE),
stderr=kwargs.pop("stderr", subprocess.PIPE),
stdin=subprocess.PIPE,
preexec_fn=cur_preexec_fn,
start_new_session=is_linux(), # TODO: change to "process_group" after upgrade to Python 3.11+
env=env,
**kwargs,
)
Expand Down
Loading

0 comments on commit fef1e28

Please sign in to comment.