Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Remove set_scorep_env #19

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/unit_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
pip install --upgrade setuptools
pip install scorep
pip install jupyter_kernel_test
pip install pyyaml dill numpy pandas
pip install pyyaml dill cloudpickle numpy pandas

- name: Build Score-P Python kernel
run: |
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ You can select the kernel in Jupyter as `scorep-python`.

## Configuring Score-P in Jupyter

`%%scorep_env`
Set up your Score-P environment with `%env` line magic.

Set up your Score-P environment. For a documentation of Score-P environment variables, see: [Score-P Measurement Configuration](https://perftools.pages.jsc.fz-juelich.de/cicd/scorep/tags/latest/html/scorepmeasurementconfig.html).
For a documentation of Score-P environment variables, see: [Score-P Measurement Configuration](https://perftools.pages.jsc.fz-juelich.de/cicd/scorep/tags/latest/html/scorepmeasurementconfig.html).

![](doc/scorep_setup.png)

Expand Down Expand Up @@ -127,7 +127,7 @@ Enables the write mode and starts the marking process. Subsequently, "running" c
Stops the marking process and writes the marked cells in a Python script. Additionally, a bash script will be created for setting the Score-P environment variables, Pyhton bindings arguments and executing the Python script.

**Hints**:
- Recording a cell containing `%%scorep_env` or `%%scorep_python_binding_arguments` will add the environment variables/Score-P Python bindings to the bash script.
- Recording a cell containing `%%scorep_python_binding_arguments` will add the Score-P Python bindings parameters to the bash script.

- Code of a cell which is not to be executed with Score-P (not inside the multicell mode and without `%%execute_with_scorep`) will be framed with `with scorep.instrumenter.disable()` in the Python script to prevent instrumentation.

Expand Down
Binary file modified doc/scorep_setup.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta:__legacy__"

[project]
name='scorep-jupyter'
version='0.4.0'
version='0.6.0'
authors=[
{name='Elias Werner',email='[email protected]'},
]
Expand All @@ -24,7 +24,8 @@ dependencies = [
"jupyter-client",
"astunparse",
"scorep",
"dill"
"dill",
"cloudpickle"
]

[project.urls]
Expand Down
134 changes: 39 additions & 95 deletions src/scorep_jupyter/kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,10 @@
import os
import subprocess
import re
import json
from scorep_jupyter.userpersistence import extract_definitions, extract_variables_names
from scorep_jupyter.userpersistence import PersHelper, scorep_script_name

PYTHON_EXECUTABLE = sys.executable
READ_CHUNK_SIZE = 8
userpersistence_token = "scorep_jupyter.userpersistence"
scorep_script_name = "scorep_script.py"
jupyter_dump = "jupyter_dump.pkl"
subprocess_dump = "subprocess_dump.pkl"


class ScorepPythonKernel(IPythonKernel):
implementation = 'Python and Score-P'
Expand All @@ -30,9 +24,6 @@ def __init__(self, **kwargs):
super().__init__(**kwargs)

self.scorep_binding_args = []
self.scorep_env = {}

self.user_variables = set()

self.multicellmode = False
self.multicellmode_cellcount = 0
Expand All @@ -42,13 +33,14 @@ def __init__(self, **kwargs):
self.writemode_filename = 'jupyter_to_script'
self.writemode_multicell = False
self.writemode_scorep_binding_args = []
self.writemode_scorep_env = []
# TODO: reset variables after each finalize writefile?
self.bash_script_filename = ""
self.python_script_filename = ""
self.bash_script = None
self.python_script = None

self.pershelper = PersHelper('dill')

def cell_output(self, string, stream='stdout'):
"""
Display string as cell output.
Expand All @@ -64,23 +56,16 @@ def standard_reply(self):
'user_expressions': {},
}

def comm_files_cleanup(self):
def switch_serializer(self, code):
"""
Clean up files used for transmitting persistence and running subprocess.
Switch serializer backend used for persistence in kernel.
"""
for aux_file in [scorep_script_name, jupyter_dump, subprocess_dump]:
if os.path.exists(aux_file):
os.remove(aux_file)

def set_scorep_env(self, code):
"""
Read and record Score-P environment variables from the cell.
"""
for scorep_param in code.split('\n')[1:]:
key, val = scorep_param.split('=')
self.scorep_env[key] = val
self.cell_output(
'Score-P environment set successfully: ' + str(self.scorep_env))
serializer = code.split('\n')[1]
if serializer == 'dill':
self.pershelper = PersHelper('dill')
elif serializer == 'cloudpickle':
self.pershelper = PersHelper('cloudpickle')
self.cell_output(f'Serializer backend switched to {serializer}, persistence was reset.')
return self.standard_reply()

def set_scorep_pythonargs(self, code):
Expand Down Expand Up @@ -116,8 +101,13 @@ def append_multicellmode(self, code):
"""
Append cell to multicell mode sequence.
"""
self.multicell_code += ("\n" + code)
self.multicellmode_cellcount += 1
max_line_len = max(len(line) for line in code.split('\n'))
self.multicell_code += f"print('Executing cell {self.multicellmode_cellcount}')\n" + \
f"print('''{code}''')\n" + \
f"print('-' * {max_line_len})\n" + \
f"{code}\n" + \
f"print('''\n''')\n"
self.cell_output(
f'Cell marked for multicell mode. It will be executed at position {self.multicellmode_cellcount}')
return self.standard_reply()
Expand Down Expand Up @@ -160,7 +150,7 @@ def end_writefile(self):
# TODO: check for os path existence
self.writemode = False
self.bash_script.write(
f"{' '.join(self.writemode_scorep_env)} {PYTHON_EXECUTABLE} -m scorep {' '.join(self.writemode_scorep_binding_args)} {self.python_script_filename}")
f"{PYTHON_EXECUTABLE} -m scorep {' '.join(self.writemode_scorep_binding_args)} {self.python_script_filename}")

self.bash_script.close()
self.python_script.close()
Expand All @@ -171,10 +161,7 @@ def append_writefile(self, code):
"""
Append cell to write mode sequence. Extract Score-P environment or Python bindings argument if necessary.
"""
if code.startswith('%%scorep_env'):
self.writemode_scorep_env += code.split('\n')[1:]
self.cell_output('Environment variables recorded.')
elif code.startswith('%%scorep_python_binding_arguments'):
if code.startswith('%%scorep_python_binding_arguments'):
self.writemode_scorep_binding_args += code.split('\n')[1:]
self.cell_output('Score-P bindings arguments recorded.')

Expand Down Expand Up @@ -214,43 +201,29 @@ async def scorep_execute(self, code, silent, store_history=True, user_expression
"""
# Ghost cell - dump current Jupyter session for subprocess
# Run in a "silent" way to not increase cells counter
dump_jupyter = "import dill\n" + f"dill.dump_session('{jupyter_dump}')"
reply_status_dump = await super().do_execute(dump_jupyter, silent, store_history=False,
reply_status_dump = await super().do_execute(self.pershelper.jupyter_dump(), silent, store_history=False,
user_expressions=user_expressions, allow_stdin=allow_stdin, cell_id=cell_id)

if reply_status_dump['status'] != 'ok':
self.shell.execution_count += 1
reply_status_dump['execution_count'] = self.shell.execution_count - 1
self.comm_files_cleanup()
self.pershelper.pers_cleanup()
self.cell_output("KernelError: Failed to pickle previous notebook's persistence and variables.",
'stderr')
return reply_status_dump

# Prepare code for the Score-P instrumented execution as subprocess
# Transmit user persistence and updated sys.path from Jupyter notebook to subprocess
# After running code, transmit subprocess persistence back to Jupyter notebook
sys_path_updated = json.dumps(sys.path)
scorep_code = "import scorep\n" + \
"with scorep.instrumenter.disable():\n" + \
f" from {userpersistence_token} import save_variables_values \n" + \
" import dill\n" + \
f" globals().update(dill.load_module_asdict('{jupyter_dump}'))\n" + \
" import sys\n" + \
" sys.path.clear()\n" + \
f" sys.path.extend({sys_path_updated})\n" + \
code + "\n" + \
"with scorep.instrumenter.disable():\n" + \
f" save_variables_values(globals(), {str(self.user_variables)}, '{subprocess_dump}')"

with open(scorep_script_name, 'w+') as file:
file.write(scorep_code)
file.write(self.pershelper.subprocess_wrapper(code))

# Launch subprocess with Jupyter notebook environment
cmd = [PYTHON_EXECUTABLE, "-m", "scorep"] + \
self.scorep_binding_args + [scorep_script_name]

proc_env = os.environ.copy()
proc_env.update(self.scorep_env)
proc_env.update({'PYTHONUNBUFFERED': 'x'}) # subprocess observation
proc_env.update({'PYTHONUNBUFFERED': 'x'}) # scorep path, subprocess observation

incomplete_line = ''
endline_pattern = re.compile(r'(.*?[\r\n]|.+$)')
Expand Down Expand Up @@ -283,31 +256,28 @@ async def scorep_execute(self, code, silent, store_history=True, user_expression
proc.wait()

if proc.returncode:
self.comm_files_cleanup()
self.pershelper.pers_cleanup()
self.cell_output(
'KernelError: Cell execution failed, cell persistence and variables are not recorded.',
'stderr')
return self.standard_reply()

# Ghost cell - load subprocess definitions and persistence back to Jupyter notebook
# Run in a "silent" way to not increase cells counter
load_jupyter = extract_definitions(code) + "\n" + \
f"with open('{subprocess_dump}', 'rb') as file:\n" + \
" globals().update(dill.load(file))\n"
reply_status_load = await super().do_execute(load_jupyter, silent, store_history=False,
reply_status_update = await super().do_execute(self.pershelper.jupyter_update(code), silent, store_history=False,
user_expressions=user_expressions, allow_stdin=allow_stdin, cell_id=cell_id)

if reply_status_load['status'] != 'ok':
if reply_status_update['status'] != 'ok':
self.shell.execution_count += 1
reply_status_load['execution_count'] = self.shell.execution_count - 1
self.comm_files_cleanup()
reply_status_update['execution_count'] = self.shell.execution_count - 1
self.pershelper.pers_cleanup()
self.cell_output("KernelError: Failed to load cell's persistence and variables to the notebook.",
'stderr')
return reply_status_load
return reply_status_update

self.comm_files_cleanup()
if 'SCOREP_EXPERIMENT_DIRECTORY' in self.scorep_env:
scorep_folder = self.scorep_env['SCOREP_EXPERIMENT_DIRECTORY']
self.pershelper.pers_cleanup()
if 'SCOREP_EXPERIMENT_DIRECTORY' in os.environ:
scorep_folder = os.environ['SCOREP_EXPERIMENT_DIRECTORY']
self.cell_output(
f"Instrumentation results can be found in {scorep_folder}")
else:
Expand Down Expand Up @@ -353,6 +323,7 @@ async def do_execute(self, code, silent, store_history=False, user_expressions=N
try:
reply_status = await self.scorep_execute(self.multicell_code, silent, store_history, user_expressions, allow_stdin, cell_id=cell_id)
except:
self.cell_output("KernelError: Multicell mode failed.",'stderr')
return self.standard_reply()
self.multicell_code = ""
self.multicellmode_cellcount = 0
Expand All @@ -369,44 +340,17 @@ async def do_execute(self, code, silent, store_history=False, user_expressions=N
elif code.startswith('%%enable_multicellmode'):
return self.enable_multicellmode()

elif code.startswith('%%scorep_env'):
return self.set_scorep_env(code)

elif code.startswith('%%switch_serializer'):
return self.switch_serializer(code)
elif code.startswith('%%scorep_python_binding_arguments'):
return self.set_scorep_pythonargs(code)
elif self.multicellmode:
return self.append_multicellmode(code)
elif code.startswith('%%execute_with_scorep'):
code = code.split("\n", 1)[1]
# Parsing for user variables might fail due to SyntaxError
try:
user_variables = extract_variables_names(code)
except SyntaxError as e:
self.cell_output(f"SyntaxError: {e}", 'stderr')
return self.standard_reply()
self.user_variables.update(user_variables)
return await self.scorep_execute(code, silent, store_history, user_expressions, allow_stdin, cell_id=cell_id)
return await self.scorep_execute(code.split("\n", 1)[1], silent, store_history, user_expressions, allow_stdin, cell_id=cell_id)
else:
# Some line/cell magics involve executing Python code, which must be parsed
# TODO: timeit, python, ...? do not save variables to globals()
whitelist_prefixes_cell = ['%%prun', '%%timeit', '%%capture', '%%python', '%%pypy']
whitelist_prefixes_line = ['%prun', '%time']

nomagic_code = '' # Code to be parsed for user variables
if not code.startswith(tuple(['%', '!'])): # No IPython magics and shell commands
nomagic_code = code
else:
if code.startswith(tuple(whitelist_prefixes_cell)): # Cell magic, remove first line
nomagic_code = code.split("\n", 1)[1]
elif code.startswith(tuple(whitelist_prefixes_line)): # Line magic, remove first word
nomagic_code = code.split(" ", 1)[1]
if nomagic_code:
# Parsing for user variables might fail due to SyntaxError
try:
user_variables = extract_variables_names(nomagic_code)
except SyntaxError as e:
self.cell_output(f"SyntaxError: {e}", 'stderr')
return self.standard_reply()
self.user_variables.update(user_variables)
self.pershelper.parse(code, 'jupyter')
return await super().do_execute(code, silent, store_history, user_expressions, allow_stdin, cell_id=cell_id)


Expand Down
Loading