diff --git a/.github/workflows/unit_test.yml b/.github/workflows/unit_test.yml index bd4c2f1..451c05c 100644 --- a/.github/workflows/unit_test.yml +++ b/.github/workflows/unit_test.yml @@ -39,7 +39,7 @@ jobs: pip install --upgrade setuptools pip install scorep pip install jupyter_kernel_test - pip install pyyaml dill numpy pandas + pip install pyyaml dill cloudpickle numpy pandas - name: Build Score-P Python kernel run: | diff --git a/README.md b/README.md index 4b11f3c..7437222 100644 --- a/README.md +++ b/README.md @@ -52,9 +52,9 @@ You can select the kernel in Jupyter as `scorep-python`. ## Configuring Score-P in Jupyter -`%%scorep_env` +Set up your Score-P environment with `%env` line magic. -Set up your Score-P environment. For a documentation of Score-P environment variables, see: [Score-P Measurement Configuration](https://perftools.pages.jsc.fz-juelich.de/cicd/scorep/tags/latest/html/scorepmeasurementconfig.html). +For a documentation of Score-P environment variables, see: [Score-P Measurement Configuration](https://perftools.pages.jsc.fz-juelich.de/cicd/scorep/tags/latest/html/scorepmeasurementconfig.html). ![](doc/scorep_setup.png) @@ -127,7 +127,7 @@ Enables the write mode and starts the marking process. Subsequently, "running" c Stops the marking process and writes the marked cells in a Python script. Additionally, a bash script will be created for setting the Score-P environment variables, Pyhton bindings arguments and executing the Python script. **Hints**: -- Recording a cell containing `%%scorep_env` or `%%scorep_python_binding_arguments` will add the environment variables/Score-P Python bindings to the bash script. +- Recording a cell containing `%%scorep_python_binding_arguments` will add the Score-P Python bindings parameters to the bash script. - Code of a cell which is not to be executed with Score-P (not inside the multicell mode and without `%%execute_with_scorep`) will be framed with `with scorep.instrumenter.disable()` in the Python script to prevent instrumentation. diff --git a/doc/scorep_setup.png b/doc/scorep_setup.png index 37efc8e..5e9388e 100644 Binary files a/doc/scorep_setup.png and b/doc/scorep_setup.png differ diff --git a/pyproject.toml b/pyproject.toml index 8bf8560..3b199bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta:__legacy__" [project] name='scorep-jupyter' -version='0.4.0' +version='0.6.0' authors=[ {name='Elias Werner',email='elias.werner@tu-dresden.de'}, ] @@ -24,7 +24,8 @@ dependencies = [ "jupyter-client", "astunparse", "scorep", - "dill" + "dill", + "cloudpickle" ] [project.urls] diff --git a/src/scorep_jupyter/kernel.py b/src/scorep_jupyter/kernel.py index 87062c0..d9c36e4 100755 --- a/src/scorep_jupyter/kernel.py +++ b/src/scorep_jupyter/kernel.py @@ -3,16 +3,10 @@ import os import subprocess import re -import json -from scorep_jupyter.userpersistence import extract_definitions, extract_variables_names +from scorep_jupyter.userpersistence import PersHelper, scorep_script_name PYTHON_EXECUTABLE = sys.executable READ_CHUNK_SIZE = 8 -userpersistence_token = "scorep_jupyter.userpersistence" -scorep_script_name = "scorep_script.py" -jupyter_dump = "jupyter_dump.pkl" -subprocess_dump = "subprocess_dump.pkl" - class ScorepPythonKernel(IPythonKernel): implementation = 'Python and Score-P' @@ -30,9 +24,6 @@ def __init__(self, **kwargs): super().__init__(**kwargs) self.scorep_binding_args = [] - self.scorep_env = {} - - self.user_variables = set() self.multicellmode = False self.multicellmode_cellcount = 0 @@ -42,13 +33,14 @@ def __init__(self, **kwargs): self.writemode_filename = 'jupyter_to_script' self.writemode_multicell = False self.writemode_scorep_binding_args = [] - self.writemode_scorep_env = [] # TODO: reset variables after each finalize writefile? self.bash_script_filename = "" self.python_script_filename = "" self.bash_script = None self.python_script = None + self.pershelper = PersHelper('dill') + def cell_output(self, string, stream='stdout'): """ Display string as cell output. @@ -64,23 +56,16 @@ def standard_reply(self): 'user_expressions': {}, } - def comm_files_cleanup(self): + def switch_serializer(self, code): """ - Clean up files used for transmitting persistence and running subprocess. + Switch serializer backend used for persistence in kernel. """ - for aux_file in [scorep_script_name, jupyter_dump, subprocess_dump]: - if os.path.exists(aux_file): - os.remove(aux_file) - - def set_scorep_env(self, code): - """ - Read and record Score-P environment variables from the cell. - """ - for scorep_param in code.split('\n')[1:]: - key, val = scorep_param.split('=') - self.scorep_env[key] = val - self.cell_output( - 'Score-P environment set successfully: ' + str(self.scorep_env)) + serializer = code.split('\n')[1] + if serializer == 'dill': + self.pershelper = PersHelper('dill') + elif serializer == 'cloudpickle': + self.pershelper = PersHelper('cloudpickle') + self.cell_output(f'Serializer backend switched to {serializer}, persistence was reset.') return self.standard_reply() def set_scorep_pythonargs(self, code): @@ -116,8 +101,13 @@ def append_multicellmode(self, code): """ Append cell to multicell mode sequence. """ - self.multicell_code += ("\n" + code) self.multicellmode_cellcount += 1 + max_line_len = max(len(line) for line in code.split('\n')) + self.multicell_code += f"print('Executing cell {self.multicellmode_cellcount}')\n" + \ + f"print('''{code}''')\n" + \ + f"print('-' * {max_line_len})\n" + \ + f"{code}\n" + \ + f"print('''\n''')\n" self.cell_output( f'Cell marked for multicell mode. It will be executed at position {self.multicellmode_cellcount}') return self.standard_reply() @@ -160,7 +150,7 @@ def end_writefile(self): # TODO: check for os path existence self.writemode = False self.bash_script.write( - f"{' '.join(self.writemode_scorep_env)} {PYTHON_EXECUTABLE} -m scorep {' '.join(self.writemode_scorep_binding_args)} {self.python_script_filename}") + f"{PYTHON_EXECUTABLE} -m scorep {' '.join(self.writemode_scorep_binding_args)} {self.python_script_filename}") self.bash_script.close() self.python_script.close() @@ -171,10 +161,7 @@ def append_writefile(self, code): """ Append cell to write mode sequence. Extract Score-P environment or Python bindings argument if necessary. """ - if code.startswith('%%scorep_env'): - self.writemode_scorep_env += code.split('\n')[1:] - self.cell_output('Environment variables recorded.') - elif code.startswith('%%scorep_python_binding_arguments'): + if code.startswith('%%scorep_python_binding_arguments'): self.writemode_scorep_binding_args += code.split('\n')[1:] self.cell_output('Score-P bindings arguments recorded.') @@ -214,14 +201,13 @@ async def scorep_execute(self, code, silent, store_history=True, user_expression """ # Ghost cell - dump current Jupyter session for subprocess # Run in a "silent" way to not increase cells counter - dump_jupyter = "import dill\n" + f"dill.dump_session('{jupyter_dump}')" - reply_status_dump = await super().do_execute(dump_jupyter, silent, store_history=False, + reply_status_dump = await super().do_execute(self.pershelper.jupyter_dump(), silent, store_history=False, user_expressions=user_expressions, allow_stdin=allow_stdin, cell_id=cell_id) if reply_status_dump['status'] != 'ok': self.shell.execution_count += 1 reply_status_dump['execution_count'] = self.shell.execution_count - 1 - self.comm_files_cleanup() + self.pershelper.pers_cleanup() self.cell_output("KernelError: Failed to pickle previous notebook's persistence and variables.", 'stderr') return reply_status_dump @@ -229,28 +215,15 @@ async def scorep_execute(self, code, silent, store_history=True, user_expression # Prepare code for the Score-P instrumented execution as subprocess # Transmit user persistence and updated sys.path from Jupyter notebook to subprocess # After running code, transmit subprocess persistence back to Jupyter notebook - sys_path_updated = json.dumps(sys.path) - scorep_code = "import scorep\n" + \ - "with scorep.instrumenter.disable():\n" + \ - f" from {userpersistence_token} import save_variables_values \n" + \ - " import dill\n" + \ - f" globals().update(dill.load_module_asdict('{jupyter_dump}'))\n" + \ - " import sys\n" + \ - " sys.path.clear()\n" + \ - f" sys.path.extend({sys_path_updated})\n" + \ - code + "\n" + \ - "with scorep.instrumenter.disable():\n" + \ - f" save_variables_values(globals(), {str(self.user_variables)}, '{subprocess_dump}')" - with open(scorep_script_name, 'w+') as file: - file.write(scorep_code) + file.write(self.pershelper.subprocess_wrapper(code)) # Launch subprocess with Jupyter notebook environment cmd = [PYTHON_EXECUTABLE, "-m", "scorep"] + \ self.scorep_binding_args + [scorep_script_name] + proc_env = os.environ.copy() - proc_env.update(self.scorep_env) - proc_env.update({'PYTHONUNBUFFERED': 'x'}) # subprocess observation + proc_env.update({'PYTHONUNBUFFERED': 'x'}) # scorep path, subprocess observation incomplete_line = '' endline_pattern = re.compile(r'(.*?[\r\n]|.+$)') @@ -283,7 +256,7 @@ async def scorep_execute(self, code, silent, store_history=True, user_expression proc.wait() if proc.returncode: - self.comm_files_cleanup() + self.pershelper.pers_cleanup() self.cell_output( 'KernelError: Cell execution failed, cell persistence and variables are not recorded.', 'stderr') @@ -291,23 +264,20 @@ async def scorep_execute(self, code, silent, store_history=True, user_expression # Ghost cell - load subprocess definitions and persistence back to Jupyter notebook # Run in a "silent" way to not increase cells counter - load_jupyter = extract_definitions(code) + "\n" + \ - f"with open('{subprocess_dump}', 'rb') as file:\n" + \ - " globals().update(dill.load(file))\n" - reply_status_load = await super().do_execute(load_jupyter, silent, store_history=False, + reply_status_update = await super().do_execute(self.pershelper.jupyter_update(code), silent, store_history=False, user_expressions=user_expressions, allow_stdin=allow_stdin, cell_id=cell_id) - if reply_status_load['status'] != 'ok': + if reply_status_update['status'] != 'ok': self.shell.execution_count += 1 - reply_status_load['execution_count'] = self.shell.execution_count - 1 - self.comm_files_cleanup() + reply_status_update['execution_count'] = self.shell.execution_count - 1 + self.pershelper.pers_cleanup() self.cell_output("KernelError: Failed to load cell's persistence and variables to the notebook.", 'stderr') - return reply_status_load + return reply_status_update - self.comm_files_cleanup() - if 'SCOREP_EXPERIMENT_DIRECTORY' in self.scorep_env: - scorep_folder = self.scorep_env['SCOREP_EXPERIMENT_DIRECTORY'] + self.pershelper.pers_cleanup() + if 'SCOREP_EXPERIMENT_DIRECTORY' in os.environ: + scorep_folder = os.environ['SCOREP_EXPERIMENT_DIRECTORY'] self.cell_output( f"Instrumentation results can be found in {scorep_folder}") else: @@ -353,6 +323,7 @@ async def do_execute(self, code, silent, store_history=False, user_expressions=N try: reply_status = await self.scorep_execute(self.multicell_code, silent, store_history, user_expressions, allow_stdin, cell_id=cell_id) except: + self.cell_output("KernelError: Multicell mode failed.",'stderr') return self.standard_reply() self.multicell_code = "" self.multicellmode_cellcount = 0 @@ -369,44 +340,17 @@ async def do_execute(self, code, silent, store_history=False, user_expressions=N elif code.startswith('%%enable_multicellmode'): return self.enable_multicellmode() - elif code.startswith('%%scorep_env'): - return self.set_scorep_env(code) + + elif code.startswith('%%switch_serializer'): + return self.switch_serializer(code) elif code.startswith('%%scorep_python_binding_arguments'): return self.set_scorep_pythonargs(code) elif self.multicellmode: return self.append_multicellmode(code) elif code.startswith('%%execute_with_scorep'): - code = code.split("\n", 1)[1] - # Parsing for user variables might fail due to SyntaxError - try: - user_variables = extract_variables_names(code) - except SyntaxError as e: - self.cell_output(f"SyntaxError: {e}", 'stderr') - return self.standard_reply() - self.user_variables.update(user_variables) - return await self.scorep_execute(code, silent, store_history, user_expressions, allow_stdin, cell_id=cell_id) + return await self.scorep_execute(code.split("\n", 1)[1], silent, store_history, user_expressions, allow_stdin, cell_id=cell_id) else: - # Some line/cell magics involve executing Python code, which must be parsed - # TODO: timeit, python, ...? do not save variables to globals() - whitelist_prefixes_cell = ['%%prun', '%%timeit', '%%capture', '%%python', '%%pypy'] - whitelist_prefixes_line = ['%prun', '%time'] - - nomagic_code = '' # Code to be parsed for user variables - if not code.startswith(tuple(['%', '!'])): # No IPython magics and shell commands - nomagic_code = code - else: - if code.startswith(tuple(whitelist_prefixes_cell)): # Cell magic, remove first line - nomagic_code = code.split("\n", 1)[1] - elif code.startswith(tuple(whitelist_prefixes_line)): # Line magic, remove first word - nomagic_code = code.split(" ", 1)[1] - if nomagic_code: - # Parsing for user variables might fail due to SyntaxError - try: - user_variables = extract_variables_names(nomagic_code) - except SyntaxError as e: - self.cell_output(f"SyntaxError: {e}", 'stderr') - return self.standard_reply() - self.user_variables.update(user_variables) + self.pershelper.parse(code, 'jupyter') return await super().do_execute(code, silent, store_history, user_expressions, allow_stdin, cell_id=cell_id) diff --git a/src/scorep_jupyter/userpersistence.py b/src/scorep_jupyter/userpersistence.py index b0422fa..3fabdaa 100644 --- a/src/scorep_jupyter/userpersistence.py +++ b/src/scorep_jupyter/userpersistence.py @@ -1,26 +1,210 @@ +import os +import shutil import ast import astunparse +from textwrap import dedent +from pathlib import Path -def save_variables_values(globs, variables, filename): - """ - Dump values of given variables into the file. - """ - import dill - user_variables = {k: v for k, v in globs.items() if k in variables} +scorep_script_name = "scorep_script.py" +jupyter_dump_dir = "jupyter_dump/" +subprocess_dump_dir = "subprocess_dump/" +main_dump = "main_dump.pkl" +os_env_dump = "os_env_dump.pkl" +sys_path_dump = "sys_path_dump.pkl" +var_dump = "var_dump.pkl" + +class PersHelper: + def __init__(self, serializer='dill'): + self.jupyter_definitions = "" + self.jupyter_variables = [] + self.serializer = serializer + self.subprocess_definitions = "" + self.subprocess_variables = [] + os.environ['SCOREP_KERNEL_PERSISTENCE_DIR'] = './' + + def get_full_jupyter_dump_dir(self): + """ + Get the full path for jupyer dump + """ + return str(Path(os.environ['SCOREP_KERNEL_PERSISTENCE_DIR']) / Path(jupyter_dump_dir)) + + def get_full_subprocess_dump_dir(self): + """ + Get the full path for subprocess dump + """ + return str(Path(os.environ['SCOREP_KERNEL_PERSISTENCE_DIR']) / Path(subprocess_dump_dir)) + + # FIXME + def pers_cleanup(self): + """ + Clean up files used for transmitting persistence and running subprocess. + """ + full_jupyter_dump_dir = self.get_full_jupyter_dump_dir() + full_subprocess_dump_dir = self.get_full_subprocess_dump_dir() + for pers_path in [scorep_script_name, + *[dirname + filename for dirname in [full_jupyter_dump_dir, full_subprocess_dump_dir] + for filename in [main_dump, os_env_dump, sys_path_dump, var_dump]]]: + if os.path.exists(pers_path): + if os.path.isfile(pers_path): + os.remove(pers_path) + elif os.path.isdir(pers_path): + shutil.rmtree(pers_path) + + def jupyter_dump(self): + """ + Generate code for kernel ghost cell to dump notebook persistence for subprocess. + """ + full_jupyter_dump_dir = self.get_full_jupyter_dump_dir() + if not os.path.exists(full_jupyter_dump_dir): + os.makedirs(full_jupyter_dump_dir) + + jupyter_dump_ = dedent(f"""\ + import sys + import os + import {self.serializer} + from scorep_jupyter.userpersistence import pickle_runtime, pickle_variables + pickle_runtime(os.environ, sys.path, '{full_jupyter_dump_dir}', {self.serializer}) + """) + if self.serializer == 'dill': + return jupyter_dump_ + f"dill.dump_session('{full_jupyter_dump_dir + main_dump}')" + elif self.serializer == 'cloudpickle': + return jupyter_dump_ + f"pickle_variables({str(self.jupyter_variables)}, globals(), '{full_jupyter_dump_dir}', {self.serializer})" + + def subprocess_wrapper(self, code): + """ + Extract subprocess user variables and definitions. + """ + self.parse(code, 'subprocess') + + full_jupyter_dump_dir = self.get_full_jupyter_dump_dir() + full_subprocess_dump_dir = self.get_full_subprocess_dump_dir() + if not os.path.exists(full_subprocess_dump_dir): + os.makedirs(full_subprocess_dump_dir) + subprocess_update = dedent(f"""\ + import sys + import os + import {self.serializer} + from scorep_jupyter.userpersistence import pickle_runtime, pickle_variables, load_runtime, load_variables + load_runtime(os.environ, sys.path, '{full_jupyter_dump_dir}', {self.serializer}) + """) + if self.serializer == 'dill': + subprocess_update += f"globals().update(dill.load_module_asdict('{full_jupyter_dump_dir + main_dump}'))" + elif self.serializer == 'cloudpickle': + subprocess_update += (self.jupyter_definitions + f"load_variables(globals(), '{full_jupyter_dump_dir}', {self.serializer})") + return subprocess_update + "\n" + code + \ + dedent(f""" + pickle_runtime(os.environ, sys.path, '{full_subprocess_dump_dir}', {self.serializer}) + pickle_variables({str(self.subprocess_variables)}, globals(), '{full_subprocess_dump_dir}', {self.serializer}) + """) + + def jupyter_update(self, code): + """ + Update aggregated storage of definitions and user variables for entire notebook. + """ + self.parse(code, 'jupyter') + + full_subprocess_dump_dir = self.get_full_subprocess_dump_dir() + return dedent(f"""\ + import sys + import os + from scorep_jupyter.userpersistence import load_runtime, load_variables + load_runtime(os.environ, sys.path, '{full_subprocess_dump_dir}', {self.serializer}) + {self.subprocess_definitions} + load_variables(globals(), '{full_subprocess_dump_dir}', {self.serializer}) + """) + + def parse(self, code, mode): + """ + Extract user variables names and definitions from the code. + """ + # Code with magics and shell commands is ignored, + # unless magics are from "white list" which execute code + # in "persistent" manner. + whitelist_prefixes_cell = ['%%prun', '%%capture'] + whitelist_prefixes_line = ['%prun', '%time'] + + nomagic_code = '' # Code to be parsed for user variables + if not code.startswith(tuple(['%', '!'])): # No IPython magics and shell commands + nomagic_code = code + elif code.startswith(tuple(whitelist_prefixes_cell)): # Cell magic & executed cell, remove first line + nomagic_code = code.split("\n", 1)[1] + elif code.startswith(tuple(whitelist_prefixes_line)): # Line magic & executed cell, remove first word + nomagic_code = code.split(" ", 1)[1] + try: + user_definitions = extract_definitions(nomagic_code) + user_variables = extract_variables_names(nomagic_code) + except SyntaxError as e: + raise + + if mode == 'subprocess': + # Parse definitions and user variables from subprocess code before running it. + self.subprocess_definitions = "" + self.subprocess_variables.clear() + self.subprocess_definitions += user_definitions + self.subprocess_variables.extend(user_variables) + elif mode == "jupyter" and self.serializer == "cloudpickle": + # Update aggregated storage of definitions and user variables for entire notebook. + # Not relevant for dill because of dump_session. + self.jupyter_definitions += user_definitions + self.jupyter_variables.extend(user_variables) + +def pickle_runtime(os_environ_, sys_path_, dump_dir, serializer): + os_env_dump_ = dump_dir + os_env_dump + sys_path_dump_ = dump_dir + sys_path_dump + + # Don't dump environment variables set by Score-P bindings. + # Will force it to re-initialize instead of calling reset_preload() + filtered_os_environ_ = {k: v for k, v in os_environ_.items() if not k.startswith('SCOREP_PYTHON_BINDINGS_')} + with open(os_env_dump_, 'wb+') as file: + serializer.dump(filtered_os_environ_, file) + with open(sys_path_dump_, 'wb+') as file: + serializer.dump(sys_path_, file) + +def pickle_variables(variables_names, globals_, dump_dir, serializer): + var_dump_ = dump_dir + var_dump + user_variables = {k: v for k, v in globals_.items() if k in variables_names} + for el in user_variables.keys(): # if possible, exchange class of the object here with the class that is stored for persistence. This is # valid since the classes should be the same and this does not affect the objects attribute dictionary non_persistent_class = user_variables[el].__class__.__name__ if non_persistent_class in globals().keys(): user_variables[el].__class__ = globals()[non_persistent_class] - with open(filename, 'wb+') as file: - dill.dump(user_variables, file) + + with open(var_dump_, 'wb+') as file: + serializer.dump(user_variables, file) + +def load_runtime(os_environ_, sys_path_, dump_dir, serializer): + os_env_dump_ = dump_dir + os_env_dump + sys_path_dump_ = dump_dir + sys_path_dump + + loaded_os_environ_ = {} + loaded_sys_path_ = [] + + if os.path.getsize(os_env_dump_) > 0: + with open(os_env_dump_, 'rb') as file: + loaded_os_environ_ = serializer.load(file) + if os.path.getsize(sys_path_dump_) > 0: + with open(sys_path_dump_, 'rb') as file: + loaded_sys_path_ = serializer.load(file) + + #os_environ_.clear() + os_environ_.update(loaded_os_environ_) + + #sys_path_.clear() + sys_path_.extend(loaded_sys_path_) + +def load_variables(globals_, dump_dir, serializer): + var_dump_ = dump_dir + var_dump + if os.path.getsize(var_dump_) > 0: + with open(var_dump_, 'rb') as file: + globals_.update(serializer.load(file)) def extract_definitions(code): """ Extract imported modules and definitions of classes and functions from the code block. """ - # can't use in kernel as import from userpersistence: + # can't use in kernel as import from scorep_jupyter.userpersistence: # self-reference error during dill dump of notebook root = ast.parse(code) definitions = [] @@ -36,17 +220,17 @@ def extract_definitions(code): ast.ImportFrom)): definitions.append(top_node) - pers_string = "" + definitions_string = "" for node in definitions: - pers_string += astunparse.unparse(node) + definitions_string += astunparse.unparse(node) - return pers_string + return definitions_string def extract_variables_names(code): """ Extract user-assigned variables from code. Unlike dir(), nothing coming from the imported modules is included. - Might contain non-variables as well from assignments, which are later filtered out in save_variables_values. + Might contain non-variables as well from assignments, which are later filtered out when dumping variables. """ root = ast.parse(code) diff --git a/tests/kernel/ipykernel_exec.yaml b/tests/kernel/ipykernel_exec.yaml index dbc6b37..6dcda3d 100644 --- a/tests/kernel/ipykernel_exec.yaml +++ b/tests/kernel/ipykernel_exec.yaml @@ -10,6 +10,5 @@ b = 10 - - "" - - - |- - print("a + b =", a + b) + - "print('a + b =', a + b)" - - "a + b = 15\n" \ No newline at end of file diff --git a/tests/kernel/multicell.yaml b/tests/kernel/multicell.yaml index c728000..3178fec 100644 --- a/tests/kernel/multicell.yaml +++ b/tests/kernel/multicell.yaml @@ -14,16 +14,29 @@ - |- with scorep.instrumenter.enable(): c = np.sum(c_mtx) - c_vec = np.arange(b, c) + c_vec = np.arange(b, c) - - "Cell marked for multicell mode. It will be executed at position 1" - - |- - print("c =", c) - print("Sum(c_vec) =", c_vec.sum()) + print('c =', c) + print('Sum(c_vec) =', c_vec.sum()) - - "Cell marked for multicell mode. It will be executed at position 2" - - "%%finalize_multicellmode" - - "\0" + - "Executing cell 1\n" + - "with scorep.instrumenter.enable():\n" + - " c = np.sum(c_mtx)\n" + - "c_vec = np.arange(b, c)\n" + - "----------------------------------\n" + - "\n" + - "\n" + - "Executing cell 2\n" + - "print('c =', c)\n" + - "print('Sum(c_vec) =', c_vec.sum())\n" + - "----------------------------------\n" - "c = 350\n" - "Sum(c_vec) = 61030\n" + - "\n" + - "\n" - "Instrumentation results can be found in tests_tmp/scorep-traces" diff --git a/tests/kernel/notebook.ipynb b/tests/kernel/notebook.ipynb index f8a7dd0..f1fa621 100644 --- a/tests/kernel/notebook.ipynb +++ b/tests/kernel/notebook.ipynb @@ -20,6 +20,15 @@ "SCOREP_EXPERIMENT_DIRECTORY=tests_tmp/scorep-traces" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%env SCOREP_KERNEL_PERSISTENCE_DIR=tests_tmp/" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -71,7 +80,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(\"a + b =\", a + b)" + "print('a + b =', a + b)" ] }, { @@ -90,23 +99,7 @@ "%%execute_with_scorep\n", "import scorep\n", "with scorep.instrumenter.enable():\n", - " print(\"a - b =\", a - b)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%bash\n", - "comm_files=(\"tests_tmp/scorep_script.py\" \"tests_tmp/jupyter_dump.pkl\" \"tests_tmp/subprocess_dump.pkl\")\n", - "\n", - "for file in \"${comm_files[@]}\"; do\n", - " if [ -e \"$file\" ]; then\n", - " echo \"Error: $file not cleaned up.\"\n", - " fi\n", - "done" + " print('a - b =', a - b)" ] }, { @@ -132,6 +125,15 @@ "### persistence" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%env JUPYTER_VAR=JUPYTER" + ] + }, { "cell_type": "code", "execution_count": null, @@ -142,7 +144,10 @@ "def f(x):\n", " return x**2\n", "a_vec = np.arange(a)\n", - "b_vec = np.arange(a, b)" + "b_vec = np.arange(a, b)\n", + "\n", + "import sys\n", + "sys.path.append('/new/jupyter/path')" ] }, { @@ -152,13 +157,22 @@ "outputs": [], "source": [ "%%execute_with_scorep\n", + "import pandas as pd\n", + "def g(x):\n", + " return np.log2(x)\n", "with scorep.instrumenter.enable():\n", - " import pandas as pd\n", - " def g(x):\n", - " return np.log2(x)\n", " c_mtx = np.outer(a_vec, b_vec)\n", - " print(\"Inner product of a_vec and b_vec =\", np.dot(a_vec, b_vec))\n", - " print(\"f(4) =\", f(4))" + "print('Inner product of a_vec and b_vec =', np.dot(a_vec, b_vec))\n", + "print('f(4) =', f(4))\n", + "\n", + "import os\n", + "import sys\n", + "print('JUPYTER_VAR =', os.environ['JUPYTER_VAR'])\n", + "if '/new/jupyter/path' in sys.path:\n", + " print(\"'/new/jupyter/path' found in sys.path\")\n", + "\n", + "os.environ['SUBPROCESS_VAR'] = 'SUBPROCESS'\n", + "sys.path.append('/new/subprocess/path')" ] }, { @@ -167,8 +181,8 @@ "metadata": {}, "outputs": [], "source": [ - "print(\"Outer product of a_vec and b_vec =\\n\", c_mtx)\n", - "print(\"g(16) =\", g(16))" + "print('Outer product of a_vec and b_vec =\\n', c_mtx)\n", + "print('g(16) =', g(16))" ] }, { @@ -182,6 +196,25 @@ "print(df['a*b'])" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%env SUBPROCESS_VAR" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if '/new/subprocess/path' in sys.path:\n", + " print(\"'/new/subprocess/path' found in sys.path\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -233,7 +266,7 @@ "source": [ "with scorep.instrumenter.enable():\n", " c = np.sum(c_mtx)\n", - " c_vec = np.arange(b, c)" + "c_vec = np.arange(b, c)" ] }, { @@ -242,8 +275,8 @@ "metadata": {}, "outputs": [], "source": [ - "print(\"c =\", c)\n", - "print(\"Sum(c_vec) =\", c_vec.sum())" + "print('c =', c)\n", + "print('Sum(c_vec) =', c_vec.sum())" ] }, { @@ -305,7 +338,7 @@ "b = 10\n", "a_vec = np.arange(a)\n", "b_vec = np.arange(a, b)\n", - "print(\"a + b =\", a + b)" + "print('a + b =', a + b)" ] }, { @@ -316,8 +349,8 @@ "source": [ "%%execute_with_scorep\n", "import scorep\n", + "print('a - b =', a - b)\n", "with scorep.instrumenter.enable():\n", - " print(\"a - b =\", a - b)\n", " c_mtx = np.outer(a_vec, b_vec)" ] }, @@ -347,7 +380,7 @@ "source": [ "with scorep.instrumenter.enable():\n", " c = np.sum(c_mtx)\n", - " c_vec = np.arange(b, c)" + "c_vec = np.arange(b, c)" ] }, { @@ -356,8 +389,8 @@ "metadata": {}, "outputs": [], "source": [ - "print(\"c =\", c)\n", - "print(\"Sum(c_vec) =\", c_vec.sum())" + "print('c =', c)\n", + "print('Sum(c_vec) =', c_vec.sum())" ] }, { @@ -391,8 +424,14 @@ } ], "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "name": "python", + "version": "3.11.6" } }, "nbformat": 4, diff --git a/tests/kernel/persistence.yaml b/tests/kernel/persistence.yaml index 0ab8ff3..7cb9a63 100644 --- a/tests/kernel/persistence.yaml +++ b/tests/kernel/persistence.yaml @@ -1,29 +1,48 @@ +- + - |- + import os + os.environ['JUPYTER_VAR'] = 'JUPYTER' + - - "" - - |- import numpy as np def f(x): return x**2 a_vec = np.arange(a) - b_vec = np.arange(a, b) + b_vec = np.arange(a, b) + + import sys + sys.path.append('/new/jupyter/path') - - "" - - |- %%execute_with_scorep + import pandas as pd + def g(x): + return np.log2(x) with scorep.instrumenter.enable(): - import pandas as pd - def g(x): - return np.log2(x) c_mtx = np.outer(a_vec, b_vec) - print("Inner product of a_vec and b_vec =", np.dot(a_vec, b_vec)) - print("f(4) =", f(4)) + print('Inner product of a_vec and b_vec =', np.dot(a_vec, b_vec)) + print('f(4) =', f(4)) + + import os + import sys + print('JUPYTER_VAR =', os.environ['JUPYTER_VAR']) + if '/new/jupyter/path' in sys.path: + print("'/new/jupyter/path' found in sys.path") + + os.environ['SUBPROCESS_VAR'] = 'SUBPROCESS' + sys.path.append('/new/subprocess/path') - - "\0" - "Inner product of a_vec and b_vec = 80\n" - "f(4) = 16\n" + - "JUPYTER_VAR = JUPYTER\n" + - "'/new/jupyter/path' found in sys.path\n" - "Instrumentation results can be found in tests_tmp/scorep-traces" - - |- - print("Outer product of a_vec and b_vec =\n", c_mtx) - print("g(16) =", g(16)) + print('Outer product of a_vec and b_vec =\n', c_mtx) + print('g(16) =', g(16)) - - | Outer product of a_vec and b_vec = [[ 0 0 0 0 0] @@ -44,4 +63,13 @@ 3 24 4 36 Name: a*b, dtype: int64 +- + - "print('SUBPROCESS_VAR =', os.environ['SUBPROCESS_VAR'])" + - - "SUBPROCESS_VAR = SUBPROCESS\n" +- + - |- + if '/new/subprocess/path' in sys.path: + print("'/new/subprocess/path' found in sys.path") + - - "'/new/subprocess/path' found in sys.path\n" + \ No newline at end of file diff --git a/tests/kernel/scorep_env.yaml b/tests/kernel/scorep_env.yaml index b34a98e..b47279e 100644 --- a/tests/kernel/scorep_env.yaml +++ b/tests/kernel/scorep_env.yaml @@ -1,9 +1,11 @@ - - |- - %%scorep_env - SCOREP_ENABLE_TRACING=1 - SCOREP_ENABLE_PROFILING=0 - SCOREP_TOTAL_MEMORY=3g - SCOREP_EXPERIMENT_DIRECTORY=tests_tmp/scorep-traces - - - "Score-P environment set successfully: {'SCOREP_ENABLE_TRACING': '1', 'SCOREP_ENABLE_PROFILING': '0', - 'SCOREP_TOTAL_MEMORY': '3g', 'SCOREP_EXPERIMENT_DIRECTORY': 'tests_tmp/scorep-traces'}" \ No newline at end of file + %env SCOREP_ENABLE_TRACING=1 + %env SCOREP_ENABLE_PROFILING=0 + %env SCOREP_TOTAL_MEMORY=3g + %env SCOREP_EXPERIMENT_DIRECTORY=tests_tmp/scorep-traces + - - | + env: SCOREP_ENABLE_TRACING=1 + env: SCOREP_ENABLE_PROFILING=0 + env: SCOREP_TOTAL_MEMORY=3g + env: SCOREP_EXPERIMENT_DIRECTORY=tests_tmp/scorep-traces diff --git a/tests/kernel/scorep_exec.yaml b/tests/kernel/scorep_exec.yaml index 6f03caa..16fe8df 100644 --- a/tests/kernel/scorep_exec.yaml +++ b/tests/kernel/scorep_exec.yaml @@ -3,21 +3,10 @@ %%execute_with_scorep import scorep with scorep.instrumenter.enable(): - print("a - b =", a - b) + print('a - b =', a - b) - - "\0" - "a - b = -5\n" - "Instrumentation results can be found in tests_tmp/scorep-traces" -- - - |- - %%bash - comm_files=("tests_tmp/scorep_script.py" "tests_tmp/jupyter_dump.pkl" "tests_tmp/subprocess_dump.pkl") - - for file in "${comm_files[@]}"; do - if [ -e "$file" ]; then - echo "Error: $file not cleaned up." - fi - done - - - "" - - |- %%bash diff --git a/tests/kernel/writemode.yaml b/tests/kernel/writemode.yaml index 9fddbb7..fd452b5 100644 --- a/tests/kernel/writemode.yaml +++ b/tests/kernel/writemode.yaml @@ -6,12 +6,12 @@ /home/runner/work/scorep_jupyter_kernel_python/scorep_jupyter_kernel_python/tests_tmp/my_jupyter_to_script.py - - |- - %%scorep_env - SCOREP_ENABLE_TRACING=1 - SCOREP_ENABLE_PROFILING=0 - SCOREP_TOTAL_MEMORY=3g - SCOREP_EXPERIMENT_DIRECTORY=tests_tmp/scorep-traces - - - "Environment variables recorded." + import os + os.environ['SCOREP_ENABLE_TRACING']="1" + os.environ['SCOREP_ENABLE_PROFILING']="0" + os.environ['SCOREP_TOTAL_MEMORY']="3g" + os.environ['SCOREP_EXPERIMENT_DIRECTORY']="tests_tmp/scorep-traces" + - - "Python commands without instrumentation recorded." - - |- %%scorep_python_binding_arguments @@ -24,14 +24,14 @@ b = 10 a_vec = np.arange(a) b_vec = np.arange(a, b) - print("a + b =", a + b) + print('a + b =', a + b) - - "Python commands without instrumentation recorded." - - |- %%execute_with_scorep import scorep + print('a - b =', a - b) with scorep.instrumenter.enable(): - print("a - b =", a - b) c_mtx = np.outer(a_vec, b_vec) - - "Python commands with instrumentation recorded." - @@ -44,12 +44,12 @@ - |- with scorep.instrumenter.enable(): c = np.sum(c_mtx) - c_vec = np.arange(b, c) + c_vec = np.arange(b, c) - - "Python commands with instrumentation recorded." - - |- - print("c =", c) - print("Sum(c_vec) =", c_vec.sum()) + print('c =', c) + print('Sum(c_vec) =', c_vec.sum()) - - "Python commands with instrumentation recorded." - - "%%finalize_multicellmode" @@ -60,7 +60,7 @@ - - |- %%bash - chmod u+x tests_tmp/my_jupyter_to_script_run.sh + chmod u+x ./tests_tmp/my_jupyter_to_script_run.sh ./tests_tmp/my_jupyter_to_script_run.sh - - "a + b = 15\n" - "a - b = -5\n" diff --git a/tests/test_userpersistence.py b/tests/test_userpersistence.py index b0ca7db..9aef91c 100644 --- a/tests/test_userpersistence.py +++ b/tests/test_userpersistence.py @@ -4,12 +4,13 @@ import json import subprocess import dill +import cloudpickle +from textwrap import dedent -from src.scorep_jupyter.userpersistence import extract_variables_names, extract_definitions +from src.scorep_jupyter.userpersistence import extract_variables_names, extract_definitions, load_variables, load_runtime PYTHON_EXECUTABLE = sys.executable -subprocess_dump = "tests_tmp/subprocess_dump.pkl" -userpersistence_token = "src.scorep_jupyter.userpersistence" +dump_dir = 'tests_tmp/' class UserPersistenceTests(unittest.TestCase): @@ -32,7 +33,7 @@ def test_00_extract_variables_names(self): variables = json.load(file) extracted_names = extract_variables_names(code) # Extracted names might contain extra non-variables from assignments - # Those are filtered out later in save_variables_values + # Those are filtered out later in pickle_values self.assertTrue(set(variables.keys()).issubset(extracted_names)) def test_01_extract_definitions(self): @@ -43,23 +44,61 @@ def test_01_extract_definitions(self): extracted_defs = extract_definitions(code) self.assertEqual(extracted_defs, expected_defs) - def test_02_save_variables_values(self): - with open("tests/userpersistence/code.py", "r") as file: - code = file.read() - with open("tests/userpersistence/variables.json", "r") as file: - variables = json.load(file) - code = f"from {userpersistence_token} import save_variables_values\n" + \ - code + "\n" + \ - f"save_variables_values(globals(), {str(list(variables.keys()))}, '{subprocess_dump}')" - cmd = [PYTHON_EXECUTABLE, "-c", code] - with subprocess.Popen(cmd, stdout=subprocess.PIPE) as proc: - proc.wait() - with open(subprocess_dump, 'rb') as file: - saved_values = dill.load(file) - # Easier to skip comparison of CustomClass object - saved_values.pop('obj') - variables.pop('obj') - self.assertEqual(saved_values, variables) + def test_02_pickle_load_runtime(self): + # clean sys.path and os.environ inside subprocess and fill with values from file + # load dump and compare with file + # merge with load runtime + for serializer, serializer_str in zip([dill, cloudpickle], ['dill', 'cloudpickle']): + with open("tests/userpersistence/os_environ.json", "r") as file: + expected_os_environ = json.load(file) + with open("tests/userpersistence/sys_path.json", "r") as file: + expected_sys_path = json.load(file) + code = dedent(f"""\ + from src.scorep_jupyter.userpersistence import pickle_runtime + import {serializer_str} + import os + import sys + os.environ.clear() + sys.path.clear() + os.environ.update({str(expected_os_environ)}) + sys.path.extend({str(expected_sys_path)}) + pickle_runtime(os.environ, sys.path, '{dump_dir}', {serializer_str}) + """) + cmd = [PYTHON_EXECUTABLE, "-c", code] + with subprocess.Popen(cmd, stdout=subprocess.PIPE) as proc: + proc.wait() + self.assertFalse(proc.returncode) + + pickled_os_environ = {} + pickled_sys_path = [] + load_runtime(pickled_os_environ, pickled_sys_path, dump_dir, serializer) + self.assertEqual(pickled_os_environ, expected_os_environ) + self.assertEqual(pickled_sys_path, expected_sys_path) + + def test_03_pickle_load_variables(self): + for serializer, serializer_str in zip([dill, cloudpickle], ['dill', 'cloudpickle']): + with open("tests/userpersistence/code.py", "r") as file: + code = file.read() + with open("tests/userpersistence/variables.json", "r") as file: + expected_variables = json.load(file) + variables_names = list(expected_variables.keys()) + + code = dedent(f"""\ + from src.scorep_jupyter.userpersistence import pickle_variables + import {serializer_str} + """) + code + \ + f"\npickle_variables({str(variables_names)}, globals(), '{dump_dir}', {serializer_str})" + cmd = [PYTHON_EXECUTABLE, "-c", code] + with subprocess.Popen(cmd, stdout=subprocess.PIPE) as proc: + proc.wait() + self.assertFalse(proc.returncode) + + pickled_variables = {} + load_variables(pickled_variables, dump_dir, serializer) + # Easier to skip comparison of CustomClass object + pickled_variables.pop('obj') + expected_variables.pop('obj') + self.assertEqual(pickled_variables, expected_variables) if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/tests/userpersistence/os_environ.json b/tests/userpersistence/os_environ.json new file mode 100644 index 0000000..2dfdf15 --- /dev/null +++ b/tests/userpersistence/os_environ.json @@ -0,0 +1 @@ +{"VAR_1": "VAR_1_VAL", "VAR_2": "VAR_2_VAL", "VAR_3": "VAR_3_VAL"} \ No newline at end of file diff --git a/tests/userpersistence/sys_path.json b/tests/userpersistence/sys_path.json new file mode 100644 index 0000000..dcf0141 --- /dev/null +++ b/tests/userpersistence/sys_path.json @@ -0,0 +1 @@ +["/new/path/1", "/new/path/2", "/new/path/3"] \ No newline at end of file