score-p · MandaloreUltimate · Dec 29, 2023 · Dec 28, 2023 · Feb 1, 2024 · Jan 27, 2024
diff --git a/.github/workflows/unit_test.yml b/.github/workflows/unit_test.yml
@@ -39,7 +39,7 @@ jobs:
           pip install --upgrade setuptools
           pip install scorep
           pip install jupyter_kernel_test
-          pip install pyyaml dill numpy pandas
+          pip install pyyaml dill cloudpickle numpy pandas
 
       - name: Build Score-P Python kernel
         run: |

diff --git a/README.md b/README.md
@@ -52,9 +52,9 @@ You can select the kernel in Jupyter as `scorep-python`.
 
 ## Configuring Score-P in Jupyter
 
-`%%scorep_env`
+Set up your Score-P environment with `%env` line magic. 
 
-Set up your Score-P environment. For a documentation of Score-P environment variables, see: [Score-P Measurement Configuration](https://perftools.pages.jsc.fz-juelich.de/cicd/scorep/tags/latest/html/scorepmeasurementconfig.html).
+For a documentation of Score-P environment variables, see: [Score-P Measurement Configuration](https://perftools.pages.jsc.fz-juelich.de/cicd/scorep/tags/latest/html/scorepmeasurementconfig.html).
 
 ![](doc/scorep_setup.png)
 
@@ -127,7 +127,7 @@ Enables the write mode and starts the marking process. Subsequently, "running" c
 Stops the marking process and writes the marked cells in a Python script. Additionally, a bash script will be created for setting the Score-P environment variables, Pyhton bindings arguments and executing the Python script.
 
 **Hints**:
-- Recording a cell containing `%%scorep_env` or `%%scorep_python_binding_arguments` will add the environment variables/Score-P Python bindings to the bash script.
+- Recording a cell containing `%%scorep_python_binding_arguments` will add the Score-P Python bindings parameters to the bash script.
 
 - Code of a cell which is not to be executed with Score-P (not inside the multicell mode and without `%%execute_with_scorep`) will be framed with `with scorep.instrumenter.disable()` in the Python script to prevent instrumentation.
 

diff --git a/doc/scorep_setup.png b/doc/scorep_setup.png
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta:__legacy__"
 
 [project]
 name='scorep-jupyter'
-version='0.4.0'
+version='0.6.0'
 authors=[
   {name='Elias Werner',email='[email protected]'},
 ]
@@ -24,7 +24,8 @@ dependencies = [
   "jupyter-client",
   "astunparse",
   "scorep",
-  "dill"
+  "dill",
+  "cloudpickle"
 ]
 
 [project.urls]

diff --git a/src/scorep_jupyter/kernel.py b/src/scorep_jupyter/kernel.py
@@ -3,16 +3,10 @@
 import os
 import subprocess
 import re
-import json
-from scorep_jupyter.userpersistence import extract_definitions, extract_variables_names
+from scorep_jupyter.userpersistence import PersHelper, scorep_script_name
 
 PYTHON_EXECUTABLE = sys.executable
 READ_CHUNK_SIZE = 8
-userpersistence_token = "scorep_jupyter.userpersistence"
-scorep_script_name = "scorep_script.py"
-jupyter_dump = "jupyter_dump.pkl"
-subprocess_dump = "subprocess_dump.pkl"
-
 
 class ScorepPythonKernel(IPythonKernel):
     implementation = 'Python and Score-P'
@@ -30,9 +24,6 @@ def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
         self.scorep_binding_args = []
-        self.scorep_env = {}
-
-        self.user_variables = set()
 
         self.multicellmode = False
         self.multicellmode_cellcount = 0
@@ -42,13 +33,14 @@ def __init__(self, **kwargs):
         self.writemode_filename = 'jupyter_to_script'
         self.writemode_multicell = False
         self.writemode_scorep_binding_args = []
-        self.writemode_scorep_env = []
         # TODO: reset variables after each finalize writefile?
         self.bash_script_filename = ""
         self.python_script_filename = ""
         self.bash_script = None
         self.python_script = None
 
+        self.pershelper = PersHelper('dill')
+
     def cell_output(self, string, stream='stdout'):
         """
         Display string as cell output.
@@ -64,23 +56,16 @@ def standard_reply(self):
                 'user_expressions': {},
                 }
 
-    def comm_files_cleanup(self):
+    def switch_serializer(self, code):
         """
-        Clean up files used for transmitting persistence and running subprocess.
+        Switch serializer backend used for persistence in kernel.
         """
-        for aux_file in [scorep_script_name, jupyter_dump, subprocess_dump]:
-            if os.path.exists(aux_file):
-                os.remove(aux_file)
-
-    def set_scorep_env(self, code):
-        """
-        Read and record Score-P environment variables from the cell.
-        """
-        for scorep_param in code.split('\n')[1:]:
-            key, val = scorep_param.split('=')
-            self.scorep_env[key] = val
-        self.cell_output(
-            'Score-P environment set successfully: ' + str(self.scorep_env))
+        serializer = code.split('\n')[1]
+        if serializer == 'dill':
+            self.pershelper = PersHelper('dill')
+        elif serializer == 'cloudpickle':
+            self.pershelper = PersHelper('cloudpickle')
+        self.cell_output(f'Serializer backend switched to {serializer}, persistence was reset.')
         return self.standard_reply()
 
     def set_scorep_pythonargs(self, code):
@@ -116,8 +101,13 @@ def append_multicellmode(self, code):
         """
         Append cell to multicell mode sequence.
         """
-        self.multicell_code += ("\n" + code)
         self.multicellmode_cellcount += 1
+        max_line_len = max(len(line) for line in code.split('\n'))
+        self.multicell_code += f"print('Executing cell {self.multicellmode_cellcount}')\n" + \
+                               f"print('''{code}''')\n" + \
+                               f"print('-' * {max_line_len})\n" + \
+                               f"{code}\n" + \
+                               f"print('''\n''')\n"
         self.cell_output(
             f'Cell marked for multicell mode. It will be executed at position {self.multicellmode_cellcount}')
         return self.standard_reply()
@@ -160,7 +150,7 @@ def end_writefile(self):
         # TODO: check for os path existence
         self.writemode = False
         self.bash_script.write(
-            f"{' '.join(self.writemode_scorep_env)} {PYTHON_EXECUTABLE} -m scorep {' '.join(self.writemode_scorep_binding_args)} {self.python_script_filename}")
+            f"{PYTHON_EXECUTABLE} -m scorep {' '.join(self.writemode_scorep_binding_args)} {self.python_script_filename}")
 
         self.bash_script.close()
         self.python_script.close()
@@ -171,10 +161,7 @@ def append_writefile(self, code):
         """
         Append cell to write mode sequence. Extract Score-P environment or Python bindings argument if necessary.
         """
-        if code.startswith('%%scorep_env'):
-            self.writemode_scorep_env += code.split('\n')[1:]
-            self.cell_output('Environment variables recorded.')
-        elif code.startswith('%%scorep_python_binding_arguments'):
+        if code.startswith('%%scorep_python_binding_arguments'):
             self.writemode_scorep_binding_args += code.split('\n')[1:]
             self.cell_output('Score-P bindings arguments recorded.')
 
@@ -214,43 +201,29 @@ async def scorep_execute(self, code, silent, store_history=True, user_expression
         """
         # Ghost cell - dump current Jupyter session for subprocess
         # Run in a "silent" way to not increase cells counter
-        dump_jupyter = "import dill\n" + f"dill.dump_session('{jupyter_dump}')"
-        reply_status_dump = await super().do_execute(dump_jupyter, silent, store_history=False,
+        reply_status_dump = await super().do_execute(self.pershelper.jupyter_dump(), silent, store_history=False,
                                                      user_expressions=user_expressions, allow_stdin=allow_stdin, cell_id=cell_id)
 
         if reply_status_dump['status'] != 'ok':
             self.shell.execution_count += 1
             reply_status_dump['execution_count'] = self.shell.execution_count - 1
-            self.comm_files_cleanup()
+            self.pershelper.pers_cleanup()
             self.cell_output("KernelError: Failed to pickle previous notebook's persistence and variables.",
                              'stderr')
             return reply_status_dump
 
         # Prepare code for the Score-P instrumented execution as subprocess
         # Transmit user persistence and updated sys.path from Jupyter notebook to subprocess
         # After running code, transmit subprocess persistence back to Jupyter notebook
-        sys_path_updated = json.dumps(sys.path)
-        scorep_code = "import scorep\n" + \
-                      "with scorep.instrumenter.disable():\n" + \
-                     f"    from {userpersistence_token} import save_variables_values \n" + \
-                      "    import dill\n" + \
-                     f"    globals().update(dill.load_module_asdict('{jupyter_dump}'))\n" + \
-                      "    import sys\n" + \
-                      "    sys.path.clear()\n" + \
-                     f"    sys.path.extend({sys_path_updated})\n" + \
-                      code + "\n" + \
-                      "with scorep.instrumenter.disable():\n" + \
-                     f"   save_variables_values(globals(), {str(self.user_variables)}, '{subprocess_dump}')"
-
         with open(scorep_script_name, 'w+') as file:
-            file.write(scorep_code)
+            file.write(self.pershelper.subprocess_wrapper(code))
 
         # Launch subprocess with Jupyter notebook environment
         cmd = [PYTHON_EXECUTABLE, "-m", "scorep"] + \
             self.scorep_binding_args + [scorep_script_name]
+
         proc_env = os.environ.copy()
-        proc_env.update(self.scorep_env)
-        proc_env.update({'PYTHONUNBUFFERED': 'x'}) # subprocess observation
+        proc_env.update({'PYTHONUNBUFFERED': 'x'}) # scorep path, subprocess observation
 
         incomplete_line = ''
         endline_pattern = re.compile(r'(.*?[\r\n]|.+$)')
@@ -283,31 +256,28 @@ async def scorep_execute(self, code, silent, store_history=True, user_expression
             proc.wait()
 
         if proc.returncode:
-            self.comm_files_cleanup()
+            self.pershelper.pers_cleanup()
             self.cell_output(
                 'KernelError: Cell execution failed, cell persistence and variables are not recorded.',
                 'stderr')
             return self.standard_reply()
 
         # Ghost cell - load subprocess definitions and persistence back to Jupyter notebook
         # Run in a "silent" way to not increase cells counter
-        load_jupyter = extract_definitions(code) + "\n" + \
-                        f"with open('{subprocess_dump}', 'rb') as file:\n" + \
-                         "    globals().update(dill.load(file))\n"
-        reply_status_load = await super().do_execute(load_jupyter, silent, store_history=False,
+        reply_status_update = await super().do_execute(self.pershelper.jupyter_update(code), silent, store_history=False,
                                                      user_expressions=user_expressions, allow_stdin=allow_stdin, cell_id=cell_id)
 
-        if reply_status_load['status'] != 'ok':
+        if reply_status_update['status'] != 'ok':
             self.shell.execution_count += 1
-            reply_status_load['execution_count'] = self.shell.execution_count - 1
-            self.comm_files_cleanup()
+            reply_status_update['execution_count'] = self.shell.execution_count - 1
+            self.pershelper.pers_cleanup()
             self.cell_output("KernelError: Failed to load cell's persistence and variables to the notebook.",
                              'stderr')
-            return reply_status_load
+            return reply_status_update
 
-        self.comm_files_cleanup()
-        if 'SCOREP_EXPERIMENT_DIRECTORY' in self.scorep_env:
-            scorep_folder = self.scorep_env['SCOREP_EXPERIMENT_DIRECTORY']
+        self.pershelper.pers_cleanup()
+        if 'SCOREP_EXPERIMENT_DIRECTORY' in os.environ:
+            scorep_folder = os.environ['SCOREP_EXPERIMENT_DIRECTORY']
             self.cell_output(
                 f"Instrumentation results can be found in {scorep_folder}")
         else:
@@ -353,6 +323,7 @@ async def do_execute(self, code, silent, store_history=False, user_expressions=N
             try:
                 reply_status = await self.scorep_execute(self.multicell_code, silent, store_history, user_expressions, allow_stdin, cell_id=cell_id)
             except:
+                self.cell_output("KernelError: Multicell mode failed.",'stderr')
                 return self.standard_reply()
             self.multicell_code = ""
             self.multicellmode_cellcount = 0
@@ -369,44 +340,17 @@ async def do_execute(self, code, silent, store_history=False, user_expressions=N
         elif code.startswith('%%enable_multicellmode'):
             return self.enable_multicellmode()
 
-        elif code.startswith('%%scorep_env'):
-            return self.set_scorep_env(code)
+
+        elif code.startswith('%%switch_serializer'):
+            return self.switch_serializer(code)
         elif code.startswith('%%scorep_python_binding_arguments'):
             return self.set_scorep_pythonargs(code)
         elif self.multicellmode:
             return self.append_multicellmode(code)
         elif code.startswith('%%execute_with_scorep'):
-            code = code.split("\n", 1)[1]
-            # Parsing for user variables might fail due to SyntaxError
-            try:
-                user_variables = extract_variables_names(code)
-            except SyntaxError as e:
-                self.cell_output(f"SyntaxError: {e}", 'stderr')
-                return self.standard_reply()
-            self.user_variables.update(user_variables)
-            return await self.scorep_execute(code, silent, store_history, user_expressions, allow_stdin, cell_id=cell_id)
+            return await self.scorep_execute(code.split("\n", 1)[1], silent, store_history, user_expressions, allow_stdin, cell_id=cell_id)
         else:
-            # Some line/cell magics involve executing Python code, which must be parsed
-            # TODO: timeit, python, ...? do not save variables to globals()
-            whitelist_prefixes_cell = ['%%prun', '%%timeit', '%%capture', '%%python', '%%pypy']
-            whitelist_prefixes_line = ['%prun', '%time']
-
-            nomagic_code = '' # Code to be parsed for user variables
-            if not code.startswith(tuple(['%', '!'])): # No IPython magics and shell commands
-                nomagic_code = code
-            else:
-                if code.startswith(tuple(whitelist_prefixes_cell)): # Cell magic, remove first line
-                    nomagic_code = code.split("\n", 1)[1]
-                elif code.startswith(tuple(whitelist_prefixes_line)): # Line magic, remove first word
-                    nomagic_code = code.split(" ", 1)[1]
-            if nomagic_code:
-                # Parsing for user variables might fail due to SyntaxError
-                try:
-                    user_variables = extract_variables_names(nomagic_code)
-                except SyntaxError as e:
-                    self.cell_output(f"SyntaxError: {e}", 'stderr')
-                    return self.standard_reply()
-                self.user_variables.update(user_variables)
+            self.pershelper.parse(code, 'jupyter')
             return await super().do_execute(code, silent, store_history, user_expressions, allow_stdin, cell_id=cell_id)