From fcd64faad5bbd38a5849e4bde44ed8f927c1b85d Mon Sep 17 00:00:00 2001 From: helenxie-bit Date: Tue, 17 Sep 2024 19:32:28 -0700 Subject: [PATCH] change work directory Signed-off-by: helenxie-bit --- .github/workflows/e2e-test-tune-api.yaml | 18 ---------------- .../workflows/template-e2e-test/action.yaml | 1 + .../scripts/gh-actions/run-e2e-tune-api.py | 21 ++++++++++++++++++- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/.github/workflows/e2e-test-tune-api.yaml b/.github/workflows/e2e-test-tune-api.yaml index 68426f23bed..909d0022ce5 100644 --- a/.github/workflows/e2e-test-tune-api.yaml +++ b/.github/workflows/e2e-test-tune-api.yaml @@ -16,24 +16,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 - - - name: Move docker data directory - shell: bash - run: | - echo "Stopping docker service ..." - sudo systemctl stop docker - DOCKER_DEFAULT_ROOT_DIR=/var/lib/docker - DOCKER_ROOT_DIR=/mnt/docker - echo "Moving ${DOCKER_DEFAULT_ROOT_DIR} -> ${DOCKER_ROOT_DIR}" - sudo mv ${DOCKER_DEFAULT_ROOT_DIR} ${DOCKER_ROOT_DIR} - echo "Creating symlink ${DOCKER_DEFAULT_ROOT_DIR} -> ${DOCKER_ROOT_DIR}" - sudo ln -s ${DOCKER_ROOT_DIR} ${DOCKER_DEFAULT_ROOT_DIR} - echo "$(sudo ls -l ${DOCKER_DEFAULT_ROOT_DIR})" - echo "Starting docker service ..." - sudo systemctl daemon-reload - sudo systemctl start docker - echo "Docker service status:" - sudo systemctl --no-pager -l -o short status docker - name: Setup Test Env uses: ./.github/workflows/template-setup-e2e-test diff --git a/.github/workflows/template-e2e-test/action.yaml b/.github/workflows/template-e2e-test/action.yaml index 7c9598df04b..c4a8c8831e4 100644 --- a/.github/workflows/template-e2e-test/action.yaml +++ b/.github/workflows/template-e2e-test/action.yaml @@ -47,3 +47,4 @@ runs: else ./test/e2e/v1beta1/scripts/gh-actions/run-e2e-experiment.sh ${{ inputs.experiments }} fi + working-directory: /mnt/docker diff --git a/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py b/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py index 9e327ac6adf..39c6d683488 100644 --- a/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py +++ b/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py @@ -36,6 +36,25 @@ def get_experiment_pods_logs(katib_client: KatibClient, exp_name: str, exp_names namespace=exp_namespace, container="metrics-logger-and-collector" # Specify the desired container ) + logging.info(f"Logs for pod {pod.metadata.name}:\n{pod_logs}") + except Exception as e: + logging.error(f"Failed to get logs for pod {pod.metadata.name}: {str(e)}") + +def get_experiment_pods_logs_2(katib_client: KatibClient, exp_name: str, exp_namespace: str): + # List all the pods in the namespace + v1 = client.CoreV1Api() + pods = v1.list_namespaced_pod(namespace=exp_namespace) + + # Filter pods related to the specific Katib Experiment + for pod in pods.items: + if exp_name in pod.metadata.name: + logging.info(f"Fetching logs for pod: {pod.metadata.name}") + try: + # Specify the container name when retrieving logs + pod_logs = v1.read_namespaced_pod_log( + name=pod.metadata.name, + namespace=exp_namespace, + ) logging.info(f"Logs for pod {pod.metadata.name} (container: metrics-logger-and-collector):\n{pod_logs}") except Exception as e: logging.error(f"Failed to get logs for pod {pod.metadata.name}: {str(e)}") @@ -197,7 +216,7 @@ def run_e2e_experiment_create_by_tune_with_external_model( logging.info("---------------------------------------------------------------") logging.info(f"E2E is failed for Experiment created by tune: {exp_namespace}/{exp_name}-2") get_experiment_pods_logs(katib_client, f"{exp_name}-2", exp_namespace) - get_experiment_pods_logs(katib_client, "katib-controller", "kubeflow") + get_experiment_pods_logs_2(katib_client, "katib-controller", "kubeflow") raise e finally: # Delete the Experiment.