Parsl · benclifford · Jun 7, 2024 · Jun 7, 2024 · Jun 7, 2024 · Jun 7, 2024
diff --git a/.github/workflows/ci-k8s.yaml b/.github/workflows/ci-k8s.yaml
@@ -0,0 +1,55 @@
+name: Parsl
+
+on:
+  pull_request:
+    types:
+      - opened
+      - synchronize
+
+jobs:
+  k8s-kind-suite:
+    runs-on: ubuntu-24.04
+    timeout-minutes: 60
+
+    steps:
+    - uses: actions/checkout@master
+
+    - name: Create k8s Kind Cluster
+      uses: helm/kind-action@v1
+      with:
+        # kind tooling uses this name by default, but kind-action uses
+        # a different default name
+        cluster_name: kind
+
+    - name: Build docker image
+      uses: docker/build-push-action@v5
+      with:
+        context: .
+        tags: parsl:ci
+
+    - name: Push docker image into kubernetes cluster
+      run: |
+        kind load docker-image parsl:ci
+
+    - name: set liberal permissions
+      run: |
+        kubectl create clusterrolebinding serviceaccounts-cluster-admin   --clusterrole=cluster-admin   --group=system:serviceaccounts
+
+    - name: launch pytest Job
+      run: |
+        free -h
+        kubectl create -f ./pytest-task.yaml 
+
+    - name: wait for pytest Job
+      run: |
+        # this pytest should take around 30 seconds to run, so 180 seconds
+        # should be plenty...
+        kubectl wait --timeout=180s --for=condition=Complete Job pytest
+
+    - name: report some info
+      if: ${{ always() }}
+      run: |
+        free -h
+        kubectl describe pods
+        kubectl describe jobs
+        kubectl logs Job/pytest
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,35 @@
+FROM debian:trixie
+
+RUN apt-get update && apt-get upgrade -y
+
+RUN apt-get update && apt-get install -y sudo openssh-server
+
+RUN apt-get update && apt-get install -y curl less vim
+
+# git is needed for parsl to figure out it's own repo-specific
+# version string
+RUN apt-get update && apt-get install -y git
+
+# useful stuff to have around
+RUN apt-get update && apt-get install -y procps
+
+# for building documentation
+RUN apt-get update && apt-get install -y pandoc
+
+# for monitoring visualization
+RUN apt-get update && apt-get install -y graphviz wget
+
+# for commandline access to monitoring database
+RUN apt-get update && apt-get install -y sqlite3
+
+RUN apt-get update && apt-get install -y python3.12 python3.12-dev
+RUN apt-get update && apt-get install -y python3.12-venv
-RUN apt-get update && apt-get install -y python3.12 python3.12-dev
-RUN apt-get update && apt-get install -y python3.12-venv
+ARG PYTHON_VERSION="3.12"
+RUN apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev
+RUN apt-get-install -y python${PYTHON_VERSION}-venv
-RUN apt-get update && apt-get install -y python3.12 python3.12-dev
-RUN apt-get update && apt-get install -y python3.12-venv
+ARG PYTHON_VERSION="3.12"
+RUN apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev
+RUN apt-get-install -y python${PYTHON_VERSION}-venv
+
+RUN apt-get update && apt-get install -y gcc build-essential make pkg-config mpich
+
+RUN python3.12 -m venv /venv
+
+ADD . /src
+WORKDIR /src
+
+RUN . /venv/bin/activate && pip3 install '.[kubernetes]' -r test-requirements.txt
diff --git a/htex_k8s_kind.py b/htex_k8s_kind.py
@@ -0,0 +1,26 @@
+from parsl.channels import LocalChannel
+from parsl.config import Config
+from parsl.executors import HighThroughputExecutor
+from parsl.launchers import SimpleLauncher
+from parsl.providers import KubernetesProvider
+
+
+def fresh_config():
+    return Config(
+        executors=[
+            HighThroughputExecutor(
+                label="executorname",
+                storage_access=[],
+                worker_debug=True,
+                cores_per_worker=1,
+                encrypted=False,  # needs certificate fs to be mounted in same place...
+                provider=KubernetesProvider(
+                    worker_init=". /venv/bin/activate",
+                    # pod_name="override-pod-name", # can't use default name because of dots, without own bugfix
+                    image="parsl:ci",
+                    max_mem="2048Gi"  # was getting OOM-killing of workers with default... maybe this will help.
+                    ),
+            )
+        ],
+        strategy='none',
+    )
diff --git a/parsl/providers/kubernetes/kube.py b/parsl/providers/kubernetes/kube.py
@@ -1,4 +1,5 @@
 import logging
+import re
 import time
 
 from parsl.providers.kubernetes.template import template_string
@@ -168,10 +169,9 @@ def submit(self, cmd_string, tasks_per_node, job_name="parsl"):
              - tasks_per_node (int) : command invocations to be launched per node
 
         Kwargs:
-             - job_name (String): Name for job, must be unique
+             - job_name (String): Name for job
 
         Returns:
-             - None: At capacity, cannot provision more
              - job_id: (string) Identifier for the job
         """
 
@@ -184,10 +184,12 @@ def submit(self, cmd_string, tasks_per_node, job_name="parsl"):
             pod_name = '{}-{}'.format(self.pod_name,
                                       cur_timestamp)
 
+        pod_name = _sanitizeDNS1123(pod_name)
+
         formatted_cmd = template_string.format(command=cmd_string,
                                                worker_init=self.worker_init)
 
-        logger.debug("Pod name :{}".format(pod_name))
+        logger.debug("Pod name: {}".format(pod_name))
         self._create_pod(image=self.image,
                          pod_name=pod_name,
                          job_name=job_name,
@@ -350,3 +352,25 @@ def label(self):
     @property
     def status_polling_interval(self):
         return 60
+
+
+# this is based on:
+# https://github.com/kubernetes/apimachinery/blob/703232ea6da48aed7ac22260dabc6eac01aab896/pkg/util/validation/validation.go#L177C32-L177C62
+DNS_LABEL_REGEXP = "^[a-z0-9]([-a-z0-9]*[a-z0-9])?$"
+
+
+def _sanitizeDNS1123(raw: str) -> str:
+    """Rewrite input string to be a valid RFC1123 DNS label.
+    This is required for Kubernetes pod names.
+    """
+
+    # label must be lowercase
+    raw = raw.lower()
+
+    # label can only contain [-a-z0-9] characters - replace everything
+    # else with -
+    raw = re.sub("[^-a-z0-9]", "-", raw)
+
+    # TODO: sanitize against first and last symbols (no - at start or end?)
+    assert re.match(DNS_LABEL_REGEXP, raw), "sanitized DNS1123 label has not been properly sanitized: " + raw
+    return raw
diff --git a/parsl/tests/test_bash_apps/test_basic.py b/parsl/tests/test_bash_apps/test_basic.py
@@ -24,6 +24,7 @@ def foo(x, y, z=10, stdout=None, label=None):
     return f"echo {x} {y} {z}"
 
 
+@pytest.mark.shared_fs
 def test_command_format_1(tmpd_cwd):
     """Testing command format for BashApps"""
 
@@ -38,6 +39,7 @@ def test_command_format_1(tmpd_cwd):
     assert so_content == "1 4 10"
 
 
+@pytest.mark.shared_fs
 def test_auto_log_filename_format(caplog):
     """Testing auto log filename format for BashApps
     """
@@ -66,6 +68,7 @@ def test_auto_log_filename_format(caplog):
         assert record.levelno < logging.ERROR
 
 
+@pytest.mark.shared_fs
 def test_parallel_for(tmpd_cwd, n=3):
     """Testing a simple parallel for loop"""
     outdir = tmpd_cwd / "outputs/test_parallel"

diff --git a/parsl/tests/test_bash_apps/test_error_codes.py b/parsl/tests/test_bash_apps/test_error_codes.py
@@ -58,6 +58,7 @@ def bad_format(stderr='std.err', stdout='std.out'):
 whitelist = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'configs', '*threads*')
 
 
+@pytest.mark.shared_fs
 def test_div_0(test_fn=div_0):
     err_code = test_matrix[test_fn]['exit_code']
     f = test_fn()
@@ -73,6 +74,7 @@ def test_div_0(test_fn=div_0):
     os.remove('std.out')
 
 
+@pytest.mark.shared_fs
 def test_bash_misuse(test_fn=bash_misuse):
     err_code = test_matrix[test_fn]['exit_code']
     f = test_fn()
@@ -87,6 +89,7 @@ def test_bash_misuse(test_fn=bash_misuse):
     os.remove('std.out')
 
 
+@pytest.mark.shared_fs
 def test_command_not_found(test_fn=command_not_found):
     err_code = test_matrix[test_fn]['exit_code']
     f = test_fn()
@@ -103,6 +106,7 @@ def test_command_not_found(test_fn=command_not_found):
     return True
 
 
+@pytest.mark.shared_fs
 def test_not_executable(test_fn=not_executable):
     err_code = test_matrix[test_fn]['exit_code']
     f = test_fn()

diff --git a/parsl/tests/test_bash_apps/test_kwarg_storage.py b/parsl/tests/test_bash_apps/test_kwarg_storage.py
@@ -8,6 +8,7 @@ def foo(z=2, stdout=None):
     return f"echo {z}"
 
 
+@pytest.mark.shared_fs
 def test_command_format_1(tmpd_cwd):
     """Testing command format for BashApps
     """

diff --git a/parsl/tests/test_bash_apps/test_memoize.py b/parsl/tests/test_bash_apps/test_memoize.py
@@ -9,9 +9,7 @@ def fail_on_presence(outputs=()):
     return 'if [ -f {0} ] ; then exit 1 ; else touch {0}; fi'.format(outputs[0])
 
 
-# This test is an oddity that requires a shared-FS and simply
-# won't work if there's a staging provider.
-# @pytest.mark.sharedFS_required
+@pytest.mark.shared_fs
 def test_bash_memoization(tmpd_cwd, n=2):
     """Testing bash memoization
     """
@@ -29,9 +27,7 @@ def fail_on_presence_kw(outputs=(), foo=None):
     return 'if [ -f {0} ] ; then exit 1 ; else touch {0}; fi'.format(outputs[0])
 
 
-# This test is an oddity that requires a shared-FS and simply
-# won't work if there's a staging provider.
-# @pytest.mark.sharedFS_required
+@pytest.mark.shared_fs
 def test_bash_memoization_keywords(tmpd_cwd, n=2):
     """Testing bash memoization
     """

diff --git a/parsl/tests/test_bash_apps/test_memoize_ignore_args.py b/parsl/tests/test_bash_apps/test_memoize_ignore_args.py
@@ -23,6 +23,7 @@ def no_checkpoint_stdout_app_ignore_args(stdout=None):
     return "echo X"
 
 
+@pytest.mark.shared_fs
 def test_memo_stdout():
 
     # this should run and create a file named after path_x

diff --git a/parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py b/parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py
@@ -30,6 +30,7 @@ def no_checkpoint_stdout_app(stdout=None):
     return "echo X"
 
 
+@pytest.mark.shared_fs
 def test_memo_stdout():
 
     assert const_list_x == const_list_x_arg

diff --git a/parsl/tests/test_bash_apps/test_multiline.py b/parsl/tests/test_bash_apps/test_multiline.py
@@ -14,6 +14,7 @@ def multiline(inputs=(), outputs=(), stderr=None, stdout=None):
     """.format(inputs=inputs, outputs=outputs)
 
 
+@pytest.mark.shared_fs
 def test_multiline(tmpd_cwd):
     so, se = tmpd_cwd / "std.out", tmpd_cwd / "std.err"
     f = multiline(

diff --git a/parsl/tests/test_bash_apps/test_stdout.py b/parsl/tests/test_bash_apps/test_stdout.py
@@ -16,7 +16,7 @@ def echo_to_streams(msg, stderr=None, stdout=None):
 whitelist = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'configs', '*threads*')
 
 speclist = (
-    '/bad/dir/t.out',
+    # '/bad/dir/t.out',  - isn't bad if we're root - should be tagged issue3328 too...
     ['t3.out', 'w'],
     ('t4.out', None),
     (42, 'w'),
@@ -26,7 +26,7 @@ def echo_to_streams(msg, stderr=None, stdout=None):
 )
 
 testids = [
-    'nonexistent_dir',
+    # 'nonexistent_dir',  - goes with above /bad/dir/t.out
     'list_not_tuple',
     'null_mode',
     'not_a_string',
@@ -73,6 +73,7 @@ def test_bad_stderr_file():
 
 
 @pytest.mark.executor_supports_std_stream_tuples
+@pytest.mark.shared_fs
 def test_stdout_truncate(tmpd_cwd, caplog):
     """Testing truncation of prior content of stdout"""
 
@@ -92,6 +93,7 @@ def test_stdout_truncate(tmpd_cwd, caplog):
         assert record.levelno < logging.ERROR
 
 
+@pytest.mark.shared_fs
 def test_stdout_append(tmpd_cwd, caplog):
     """Testing appending to prior content of stdout (default open() mode)"""
 

diff --git a/parsl/tests/test_docs/test_from_slides.py b/parsl/tests/test_docs/test_from_slides.py
@@ -1,5 +1,7 @@
 import os
 
+import pytest
+
 from parsl.app.app import bash_app, python_app
 from parsl.data_provider.files import File
 
@@ -15,6 +17,7 @@ def cat(inputs=[]):
         return f.readlines()
 
 
+@pytest.mark.staging_required
 def test_slides():
     """Testing code snippet from slides """
 

diff --git a/parsl/tests/test_docs/test_kwargs.py b/parsl/tests/test_docs/test_kwargs.py
@@ -1,6 +1,8 @@
 """Functions used to explain kwargs"""
 from pathlib import Path
 
+import pytest
+
 from parsl import File, python_app
 
 
@@ -19,6 +21,7 @@ def reduce_app(inputs=()):
     assert reduce_future.result() == 6
 
 
+@pytest.mark.shared_fs
 def test_outputs(tmpd_cwd):
     @python_app()
     def write_app(message, outputs=()):

diff --git a/parsl/tests/test_python_apps/test_outputs.py b/parsl/tests/test_python_apps/test_outputs.py
@@ -16,6 +16,7 @@ def double(x, outputs=[]):
 whitelist = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'configs', '*threads*')
 
 
+@pytest.mark.shared_fs
 def test_launch_apps(tmpd_cwd, n=2):
     outdir = tmpd_cwd / "outputs"
     outdir.mkdir()

diff --git a/parsl/tests/test_regression/test_226.py b/parsl/tests/test_regression/test_226.py
@@ -53,6 +53,7 @@ def test_get_dataframe():
     assert res.equals(data), 'Unexpected dataframe'
 
 
+@pytest.mark.shared_fs
 def test_bash_default_arg():
     if os.path.exists('std.out'):
         os.remove('std.out')

diff --git a/parsl/tests/test_staging/test_docs_1.py b/parsl/tests/test_staging/test_docs_1.py
@@ -12,6 +12,7 @@ def convert(inputs=[], outputs=[]):
 
 
 @pytest.mark.cleannet
+@pytest.mark.staging_required
 def test():
     # create an remote Parsl file
     inp = File('ftp://ftp.iana.org/pub/mirror/rirstats/arin/ARIN-STATS-FORMAT-CHANGE.txt')

diff --git a/parsl/tests/test_staging/test_output_chain_filenames.py b/parsl/tests/test_staging/test_output_chain_filenames.py
@@ -1,5 +1,7 @@
 from concurrent.futures import Future
 
+import pytest
+
 from parsl import File
 from parsl.app.app import bash_app
 
@@ -14,6 +16,7 @@ def app2(inputs=(), outputs=(), stdout=None, stderr=None, mock=False):
     return f"echo '{inputs[0]}' > {outputs[0]}"
 
 
+@pytest.mark.shared_fs
 def test_behavior(tmpd_cwd):
     expected_path = str(tmpd_cwd / "simple-out.txt")
     app1_future = app1(