From e972a04b7d7d7a3001d7988fee3e91ba044e4b41 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Wed, 6 Nov 2024 09:17:26 -0800 Subject: [PATCH 01/45] Add workflow for GitHub Actions --- .../gha_workflow_llama_stack_tests.yml | 339 ++++++++++++++++++ 1 file changed, 339 insertions(+) create mode 100644 .github/workflows/gha_workflow_llama_stack_tests.yml diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml new file mode 100644 index 0000000000..32b7d361e4 --- /dev/null +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -0,0 +1,339 @@ +name: "Run Llama-stack Tests" + +on: + pull_request_target: + types: ["opened"] + branches: + - 'main' + paths: + - 'llama_stack/**/*.py' + - 'tests/**/*.py' + + workflow_dispatch: + inputs: + runner: + description: 'GHA Runner Scale Set label to run workflow on.' + required: true + default: "llama-stack-gha-runner-gpu" + + branch: + description: "Branch to checkout" + required: true + default: "main" + + debug: + description: 'Run debugging steps?' + required: false + default: "true" + + sleep_time: + description: '[DEBUG] sleep time for debugging' + required: true + default: "0" + + model_ids: + description: 'Comma separated list of models to test' + required: true + default: "Llama3.2-3B-Instruct" + + provider_id: + description: 'ID of your provider' + required: true + default: "meta-reference" + + api_key: + description: 'Provider API key' + required: false + default: "---" + +env: + # Path to model checkpoints within EFS volume + MODELS_PATH: "/data/llama" + + # Path to directory to run tests from + TESTS_PATH: "${{ github.workspace }}/llama_stack/providers/tests" + + # List of models that are to be tested + MODEL_IDS: "${{ inputs.model_ids || 'Llama3.2-3B-Instruct' }}" + + # ID used for each test's provider config + PROVIDER_ID: "${{ inputs.provider_id || 'meta-reference' }}" + + # Defined dynamically when each test is run below + PROVIDER_CONFIG: "" + + # (Unused) API key that can be manually defined for workflow dispatch + API_KEY: "${{ inputs.api_key || '' }}" + + # Defines which directories in TESTS_PATH to exclude from the test loop + EXCLUDED_DIRS: "__pycache__" + + # Defines the output xml reports generated after a test is run + REPORTS_GEN: "" + +jobs: + execute_workflow: + name: Execute workload on Self-Hosted GPU k8s runner + permissions: + pull-requests: write + defaults: + run: + shell: bash # default shell to run all steps for a given job. + runs-on: ${{ inputs.runner != '' && inputs.runner || 'llama-stack-gha-runner-gpu' }} + if: always() + steps: + + ############################## + #### INITIAL DEBUG CHECKS #### + ############################## + - name: "[DEBUG] Check content of the EFS mount" + id: debug_efs_volume + continue-on-error: true + if: inputs.debug == 'true' + run: | + echo "========= Content of the EFS mount =============" + ls -la ${{ env.MODELS_PATH }} + + - name: "Check if models exist in EFS volume" + id: check_if_models_exist + run: | + for model_id in ${MODEL_IDS//,/ }; do + model_path="${MODELS_PATH}/${model_id}" + if [ ! -d "${model_path}" ]; then + echo "Model '${model_id}' does not exist in mounted EFS volume, Terminating workflow." + exit 1 + else + echo "Content of '${model_id}' model" + ls -la "${model_path}" + fi + done + + - name: "[DEBUG] Get runner container OS information" + id: debug_os_info + if: ${{ inputs.debug == 'true' }} + run: | + cat /etc/os-release + + ####################### + #### CODE CHECKOUT #### + ####################### + - name: "Checkout 'meta-llama/llama-stack' repository" + id: checkout_repo + uses: actions/checkout@v4 + with: + ref: ${{ inputs.branch }} + + - name: "[DEBUG] Content of the repository after checkout" + id: debug_content_after_checkout + if: ${{ inputs.debug == 'true' }} + run: | + ls -la ${GITHUB_WORKSPACE} + + ########################################################## + #### OPTIONAL SLEEP DEBUG #### + # # + # Use to "exec" into the test k8s POD and run tests # + # manually to identify what dependencies are being used. # + # # + ########################################################## + - name: "[DEBUG] sleep" + id: debug_sleep + if: ${{ inputs.debug == 'true' && inputs.sleep_time != '' }} + run: | + sleep ${{ inputs.sleep_time }} + + ############################ + #### UPDATE SYSTEM PATH #### + ############################ + - name: "[DEBUG] Update path: before" + id: path_update_before + if: ${{ inputs.debug == 'true' }} + run: | + echo "System path before update:" + echo "PATH=$PATH" + echo "GITHUB_PATH=$GITHUB_PATH" + + - name: "Update path: execute" + id: path_update_exec + run: | + # .local/bin is needed for certain libraries installed below to be recognized + # when calling their executable to install sub-dependencies + mkdir -p ${HOME}/.local/bin + echo "${HOME}/.local/bin" >> "$GITHUB_PATH" + + - name: "[DEBUG] Update path: after" + id: path_update_after + if: ${{ inputs.debug == 'true' }} + run: | + echo "System path after update:" + echo "PATH=$PATH" + echo "GITHUB_PATH=$GITHUB_PATH" + + ################################## + #### DEPENDENCY INSTALLATIONS #### + ################################## + - name: "Installing 'apt' required packages" + id: install_apt + run: | + echo "[STEP] Installing 'apt' required packages" + sudo apt update -y + sudo apt install -y python3 python3-pip npm wget + + - name: "Installing packages with 'curl'" + id: install_curl + run: | + curl -fsSL https://ollama.com/install.sh | sh + + - name: "Installing packages with 'wget'" + id: install_wget + run: | + wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh + chmod +x Miniconda3-latest-Linux-x86_64.sh + ./Miniconda3-latest-Linux-x86_64.sh -b install -c pytorch -c nvidia faiss-gpu=1.9.0 + # Add miniconda3 bin to system path + echo "${HOME}/miniconda3/bin" >> "$GITHUB_PATH" + + - name: "Installing packages with 'npm'" + id: install_npm_generic + run: | + sudo npm install -g junit-merge + + - name: "Installing 'llama-stack' dependencies" + id: install_pip_generic + run: | + echo "[STEP] Installing 'llama-stack' models" + pip install -U pip setuptools + pip install -r requirements.txt + pip install -e . + pip install -U \ + torch torchvision \ + pytest pytest_asyncio \ + fairscale lm-format-enforcer \ + zmq chardet pypdf \ + pandas sentence_transformers together + conda install -q -c pytorch -c nvidia faiss-gpu=1.9.0 + + - name: "Installing specific manual_dispatch dependencies" + id: manual_install_pip + if: github.event_name == 'workflow_dispatch' + run: | + echo "[STEP] Installing specific dependencies for manual dispatch workflows" + # N.A. + + ############################################################# + #### TESTING TO BE DONE FOR BOTH PRS AND MANUAL DISPATCH #### + ############################################################# + - name: "Manual - Run Tests: Loop" + id: manual_run_tests_loop + working-directory: "${{ github.workspace }}" + run: | + pattern="" + for dir in llama_stack/providers/tests/*; do + if [ -d "$dir" ]; then + dir_name=$(basename "$dir") + if [[ ! " $EXCLUDED_DIRS " =~ " $dir_name " ]]; then + for file in "$dir"/test_*.py; do + test_name=$(basename "$file") + new_file="result-${dir_name}-${test_name}.xml" + if PROVIDER_CONFIG=$TESTS_PATH/${dir_name}/provider_config_example.yaml \ + torchrun $(which pytest) -s ${TESTS_PATH}/${dir_name}/${test_name} \ + --tb=short --disable-warnings --junitxml="${{ github.workspace }}/${new_file}"; then + echo "Test passed: $test_name" + else + echo "Test failed: $test_name" + fi + pattern+="${new_file} " + done + fi + fi + done + echo "REPORTS_GEN=$pattern" >> "$GITHUB_ENV" + + - name: "Manual - Test Summary: Merge" + id: test_summary_merge + if: always() + working-directory: "${{ github.workspace }}" + run: | + echo "Merging the following test result files: ${REPORTS_GEN}" + # Defaults to merging them into 'merged-test-results.xml' + junit-merge ${{ env.REPORTS_GEN }} + + ############################################ + #### AUTOMATIC TESTING ON PULL REQUESTS #### + ############################################ + + #### Run tests #### + + - name: "PR - Run Tests" + id: pr_run_tests + working-directory: "${{ github.workspace }}" + if: github.event_name == 'pull_request_target' + run: | + echo "[STEP] Running PyTest tests at 'GITHUB_WORKSPACE' path: ${GITHUB_WORKSPACE} | path: ${{ github.workspace }}" + # (Optional) Add more tests here. + + # Merge test results with 'merged-test-results.xml' from above. + # junit-merge merged-test-results.xml + + #### Create test summary #### + + - name: "PR - Test Summary" + id: pr_test_summary_create + if: github.event_name == 'pull_request_target' + uses: test-summary/action@v2 + with: + paths: "${{ github.workspace }}/merged-test-results.xml" + output: test-summary.md + + - name: "PR - Upload Test Summary" + id: pr_test_summary_upload + if: github.event_name == 'pull_request_target' + uses: actions/upload-artifact@v3 + with: + name: test-summary + path: test-summary.md + + #### Update PR request #### + + - name: "PR - Update comment" + id: pr_update_comment + if: github.event_name == 'pull_request_target' + uses: thollander/actions-comment-pull-request@v2 + with: + filePath: test-summary.md + + ######################## + #### MANUAL TESTING #### + ######################## + + #### Run tests #### + + - name: "Manual - Run Tests: Prep" + id: manual_run_tests + working-directory: "${{ github.workspace }}" + if: github.event_name == 'workflow_dispatch' + run: | + echo "[STEP] Running PyTest tests at 'GITHUB_WORKSPACE' path: ${GITHUB_WORKSPACE} | path: ${{ github.workspace }}" + # (Optional) Add more tests here. + + # Merge test results with 'merged-test-results.xml' from above. + # junit-merge merged-test-results.xml + + #### Create test summary #### + + - name: "Manual - Test Summary" + id: manual_test_summary + if: always() && github.event_name == 'workflow_dispatch' + uses: test-summary/action@v2 + with: + paths: "${{ github.workspace }}/merged-test-results.xml" + + + - name: "Workplace Message - Group" + id: workplace_message_group + if: always() + uses: florianldt/workplace-action@master + with: + access-token: ${{ secrets.WORKPLACE_ACCESS_TOKEN }} + thread-key: ${{ secrets.WORKPLACE_THREAD_KEY }} + text: 'This is a test message!' From 9d5636180cda022f5992b4ada118dbcff3cc6fb0 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Wed, 6 Nov 2024 09:23:57 -0800 Subject: [PATCH 02/45] Update torchrun to pytest and checkpoint directory env var --- .github/workflows/gha_workflow_llama_stack_tests.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 32b7d361e4..8263dcba79 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -48,7 +48,7 @@ on: env: # Path to model checkpoints within EFS volume - MODELS_PATH: "/data/llama" + CHECKPOINT_DIR: "/data/llama" # Path to directory to run tests from TESTS_PATH: "${{ github.workspace }}/llama_stack/providers/tests" @@ -92,13 +92,13 @@ jobs: if: inputs.debug == 'true' run: | echo "========= Content of the EFS mount =============" - ls -la ${{ env.MODELS_PATH }} + ls -la ${{ env.CHECKPOINT_DIR }} - name: "Check if models exist in EFS volume" id: check_if_models_exist run: | for model_id in ${MODEL_IDS//,/ }; do - model_path="${MODELS_PATH}/${model_id}" + model_path="${CHECKPOINT_DIR}/${model_id}" if [ ! -d "${model_path}" ]; then echo "Model '${model_id}' does not exist in mounted EFS volume, Terminating workflow." exit 1 @@ -235,9 +235,8 @@ jobs: for file in "$dir"/test_*.py; do test_name=$(basename "$file") new_file="result-${dir_name}-${test_name}.xml" - if PROVIDER_CONFIG=$TESTS_PATH/${dir_name}/provider_config_example.yaml \ - torchrun $(which pytest) -s ${TESTS_PATH}/${dir_name}/${test_name} \ - --tb=short --disable-warnings --junitxml="${{ github.workspace }}/${new_file}"; then + if pytest -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "meta_reference and llama_3b" \ + --junitxml="${{ github.workspace }}/${new_file}"; then echo "Test passed: $test_name" else echo "Test failed: $test_name" From 02663d86b5010911dd3061046db6adf31a2f7032 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Wed, 6 Nov 2024 09:25:10 -0800 Subject: [PATCH 03/45] Remove Workplace Message notification Action --- .github/workflows/gha_workflow_llama_stack_tests.yml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 8263dcba79..4c24d5aca1 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -326,13 +326,3 @@ jobs: uses: test-summary/action@v2 with: paths: "${{ github.workspace }}/merged-test-results.xml" - - - - name: "Workplace Message - Group" - id: workplace_message_group - if: always() - uses: florianldt/workplace-action@master - with: - access-token: ${{ secrets.WORKPLACE_ACCESS_TOKEN }} - thread-key: ${{ secrets.WORKPLACE_THREAD_KEY }} - text: 'This is a test message!' From e200a4232c6a290c58173d5b2d790041928d9820 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Wed, 6 Nov 2024 09:38:42 -0800 Subject: [PATCH 04/45] Add aiosqlite as library dependency --- .github/workflows/gha_workflow_llama_stack_tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 4c24d5aca1..514260528b 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -210,7 +210,8 @@ jobs: pytest pytest_asyncio \ fairscale lm-format-enforcer \ zmq chardet pypdf \ - pandas sentence_transformers together + pandas sentence_transformers together \ + aiosqlite conda install -q -c pytorch -c nvidia faiss-gpu=1.9.0 - name: "Installing specific manual_dispatch dependencies" From 307b2ca33eda0f1da74d77a12891ef4ff69a3ee0 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Wed, 6 Nov 2024 09:58:35 -0800 Subject: [PATCH 05/45] Add RANK environment variable for torch.distributed --- .github/workflows/gha_workflow_llama_stack_tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 514260528b..402b04ef18 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -71,6 +71,8 @@ env: # Defines the output xml reports generated after a test is run REPORTS_GEN: "" + RANK: 0 + jobs: execute_workflow: name: Execute workload on Self-Hosted GPU k8s runner From 274b5a0d0c1634a26b03f00d8e9f2bf737d715c4 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Wed, 6 Nov 2024 10:16:19 -0800 Subject: [PATCH 06/45] Add WORLD_SIZE env var for torch.distributed --- .github/workflows/gha_workflow_llama_stack_tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 402b04ef18..47b7f3c10a 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -72,6 +72,7 @@ env: REPORTS_GEN: "" RANK: 0 + WORLD_SIZE: 4 jobs: execute_workflow: From f6b658e8b9cf66edfe9423c4d984cad2777216d0 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Wed, 6 Nov 2024 11:12:55 -0800 Subject: [PATCH 07/45] Add torchrun specially for meta_reference --- .github/workflows/gha_workflow_llama_stack_tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 47b7f3c10a..2a7aa8b3d1 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -239,8 +239,8 @@ jobs: for file in "$dir"/test_*.py; do test_name=$(basename "$file") new_file="result-${dir_name}-${test_name}.xml" - if pytest -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "meta_reference and llama_3b" \ - --junitxml="${{ github.workspace }}/${new_file}"; then + if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "meta_reference and llama_3b" \ + --junitxml="${{ github.workspace }}/${new_file}"; then echo "Test passed: $test_name" else echo "Test failed: $test_name" From 00829b8537733cebe96fffe9675f866c81fe05e2 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Wed, 6 Nov 2024 12:15:22 -0800 Subject: [PATCH 08/45] Rename checkpoint directory env var --- .github/workflows/gha_workflow_llama_stack_tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 2a7aa8b3d1..958cc7080f 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -48,7 +48,7 @@ on: env: # Path to model checkpoints within EFS volume - CHECKPOINT_DIR: "/data/llama" + MODEL_CHECKPOINT_DIR: "/data/llama" # Path to directory to run tests from TESTS_PATH: "${{ github.workspace }}/llama_stack/providers/tests" @@ -95,13 +95,13 @@ jobs: if: inputs.debug == 'true' run: | echo "========= Content of the EFS mount =============" - ls -la ${{ env.CHECKPOINT_DIR }} + ls -la ${{ env.MODEL_CHECKPOINT_DIR }} - name: "Check if models exist in EFS volume" id: check_if_models_exist run: | for model_id in ${MODEL_IDS//,/ }; do - model_path="${CHECKPOINT_DIR}/${model_id}" + model_path="${MODEL_CHECKPOINT_DIR}/${model_id}" if [ ! -d "${model_path}" ]; then echo "Model '${model_id}' does not exist in mounted EFS volume, Terminating workflow." exit 1 From 91b37c8972b00da10aafe3a18d52c35e525e9bf8 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Wed, 6 Nov 2024 13:24:15 -0800 Subject: [PATCH 09/45] Remove old environment variables --- .github/workflows/gha_workflow_llama_stack_tests.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 958cc7080f..9094817249 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -57,10 +57,10 @@ env: MODEL_IDS: "${{ inputs.model_ids || 'Llama3.2-3B-Instruct' }}" # ID used for each test's provider config - PROVIDER_ID: "${{ inputs.provider_id || 'meta-reference' }}" + #PROVIDER_ID: "${{ inputs.provider_id || 'meta-reference' }}" # Defined dynamically when each test is run below - PROVIDER_CONFIG: "" + #PROVIDER_CONFIG: "" # (Unused) API key that can be manually defined for workflow dispatch API_KEY: "${{ inputs.api_key || '' }}" @@ -71,9 +71,6 @@ env: # Defines the output xml reports generated after a test is run REPORTS_GEN: "" - RANK: 0 - WORLD_SIZE: 4 - jobs: execute_workflow: name: Execute workload on Self-Hosted GPU k8s runner From 69215b6f6c13c10d668d4a81c3243e2dca0d1747 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Thu, 7 Nov 2024 06:36:27 -0800 Subject: [PATCH 10/45] Update checkpoint directory to include model --- .../gha_workflow_llama_stack_tests.yml | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 9094817249..8f7a25ee4c 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -48,7 +48,7 @@ on: env: # Path to model checkpoints within EFS volume - MODEL_CHECKPOINT_DIR: "/data/llama" + MODEL_CHECKPOINT_DIR: "/data/llama/Llama3.2-3B-Instruct" # Path to directory to run tests from TESTS_PATH: "${{ github.workspace }}/llama_stack/providers/tests" @@ -97,16 +97,16 @@ jobs: - name: "Check if models exist in EFS volume" id: check_if_models_exist run: | - for model_id in ${MODEL_IDS//,/ }; do - model_path="${MODEL_CHECKPOINT_DIR}/${model_id}" - if [ ! -d "${model_path}" ]; then - echo "Model '${model_id}' does not exist in mounted EFS volume, Terminating workflow." - exit 1 - else - echo "Content of '${model_id}' model" - ls -la "${model_path}" - fi - done + #for model_id in ${MODEL_IDS//,/ }; do + # model_path="${MODEL_CHECKPOINT_DIR}/${model_id}" + # if [ ! -d "${model_path}" ]; then + # echo "Model '${model_id}' does not exist in mounted EFS volume, Terminating workflow." + # exit 1 + # else + # echo "Content of '${model_id}' model" + # ls -la "${model_path}" + # fi + #done - name: "[DEBUG] Get runner container OS information" id: debug_os_info From 2795731434181e647179edb8d48c7f1e54deef2c Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Wed, 20 Nov 2024 14:37:27 -0800 Subject: [PATCH 11/45] Update model name for mete-reference template --- .github/workflows/gha_workflow_llama_stack_tests.yml | 3 +++ llama_stack/templates/meta-reference-gpu/meta_reference.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 8f7a25ee4c..ee3451d7bb 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -1,6 +1,9 @@ name: "Run Llama-stack Tests" on: + push: + branches: + - 'main' pull_request_target: types: ["opened"] branches: diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index f254bc9203..2408ef972c 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -81,7 +81,7 @@ def get_distribution_template() -> DistributionTemplate: "Port for the Llama Stack distribution server", ), "INFERENCE_MODEL": ( - "meta-llama/Llama-3.2-3B-Instruct", + "meta-llama/Llama3.2-3B-Instruct", "Inference model loaded into the Meta Reference server", ), "INFERENCE_CHECKPOINT_DIR": ( From 16ffe19a2048345bbee50d5d8ac8ebd1a091310a Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Wed, 20 Nov 2024 18:03:34 -0800 Subject: [PATCH 12/45] Account for if a permitted model is None --- .github/workflows/gha_workflow_llama_stack_tests.yml | 3 --- .../providers/inline/inference/meta_reference/config.py | 4 ++-- llama_stack/providers/inline/inference/vllm/config.py | 4 ++-- llama_stack/providers/utils/inference/prompt_adapter.py | 2 +- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index ee3451d7bb..8f7a25ee4c 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -1,9 +1,6 @@ name: "Run Llama-stack Tests" on: - push: - branches: - - 'main' pull_request_target: types: ["opened"] branches: diff --git a/llama_stack/providers/inline/inference/meta_reference/config.py b/llama_stack/providers/inline/inference/meta_reference/config.py index 4713e7f99d..564e5a7085 100644 --- a/llama_stack/providers/inline/inference/meta_reference/config.py +++ b/llama_stack/providers/inline/inference/meta_reference/config.py @@ -37,8 +37,8 @@ class MetaReferenceInferenceConfig(BaseModel): @classmethod def validate_model(cls, model: str) -> str: permitted_models = supported_inference_models() - descriptors = [m.descriptor() for m in permitted_models] - repos = [m.huggingface_repo for m in permitted_models] + descriptors = [m.descriptor() for m in permitted_models if m is not None] + repos = [m.huggingface_repo for m in permitted_models if m is not None] if model not in (descriptors + repos): model_list = "\n\t".join(repos) raise ValueError( diff --git a/llama_stack/providers/inline/inference/vllm/config.py b/llama_stack/providers/inline/inference/vllm/config.py index 8a95298f41..2a39d00968 100644 --- a/llama_stack/providers/inline/inference/vllm/config.py +++ b/llama_stack/providers/inline/inference/vllm/config.py @@ -49,8 +49,8 @@ def sample_run_config(cls): def validate_model(cls, model: str) -> str: permitted_models = supported_inference_models() - descriptors = [m.descriptor() for m in permitted_models] - repos = [m.huggingface_repo for m in permitted_models] + descriptors = [m.descriptor() for m in permitted_models if m is not None] + repos = [m.huggingface_repo for m in permitted_models if m is not None] if model not in (descriptors + repos): model_list = "\n\t".join(repos) raise ValueError( diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/llama_stack/providers/utils/inference/prompt_adapter.py index 6e4d0752e2..5d81bb4b1b 100644 --- a/llama_stack/providers/utils/inference/prompt_adapter.py +++ b/llama_stack/providers/utils/inference/prompt_adapter.py @@ -179,7 +179,7 @@ def chat_completion_request_to_messages( return request.messages allowed_models = supported_inference_models() - descriptors = [m.descriptor() for m in allowed_models] + descriptors = [m.descriptor() for m in allowed_models if m is not None] if model.descriptor() not in descriptors: cprint(f"Unsupported inference model? {model.descriptor()}", color="red") return request.messages From 490c5fb73071e20e412a14985f42b4de583be005 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Wed, 20 Nov 2024 19:17:44 -0800 Subject: [PATCH 13/45] Undo None check and temporarily move if model check before builder --- .../providers/inline/inference/meta_reference/config.py | 4 ++-- .../providers/inline/inference/meta_reference/inference.py | 4 ++-- llama_stack/providers/inline/inference/vllm/config.py | 4 ++-- llama_stack/providers/utils/inference/prompt_adapter.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/llama_stack/providers/inline/inference/meta_reference/config.py b/llama_stack/providers/inline/inference/meta_reference/config.py index 564e5a7085..4713e7f99d 100644 --- a/llama_stack/providers/inline/inference/meta_reference/config.py +++ b/llama_stack/providers/inline/inference/meta_reference/config.py @@ -37,8 +37,8 @@ class MetaReferenceInferenceConfig(BaseModel): @classmethod def validate_model(cls, model: str) -> str: permitted_models = supported_inference_models() - descriptors = [m.descriptor() for m in permitted_models if m is not None] - repos = [m.huggingface_repo for m in permitted_models if m is not None] + descriptors = [m.descriptor() for m in permitted_models] + repos = [m.huggingface_repo for m in permitted_models] if model not in (descriptors + repos): model_list = "\n\t".join(repos) raise ValueError( diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py index e6bcd6730d..d58ecc8bdf 100644 --- a/llama_stack/providers/inline/inference/meta_reference/inference.py +++ b/llama_stack/providers/inline/inference/meta_reference/inference.py @@ -34,6 +34,8 @@ class MetaReferenceInferenceImpl(Inference, ModelRegistryHelper, ModelsProtocolP def __init__(self, config: MetaReferenceInferenceConfig) -> None: self.config = config model = resolve_model(config.model) + if model is None: + raise RuntimeError(f"Unknown model: {config.model}, Run `llama model list`") ModelRegistryHelper.__init__( self, [ @@ -43,8 +45,6 @@ def __init__(self, config: MetaReferenceInferenceConfig) -> None: ) ], ) - if model is None: - raise RuntimeError(f"Unknown model: {config.model}, Run `llama model list`") self.model = model # verify that the checkpoint actually is for this model lol diff --git a/llama_stack/providers/inline/inference/vllm/config.py b/llama_stack/providers/inline/inference/vllm/config.py index 2a39d00968..8a95298f41 100644 --- a/llama_stack/providers/inline/inference/vllm/config.py +++ b/llama_stack/providers/inline/inference/vllm/config.py @@ -49,8 +49,8 @@ def sample_run_config(cls): def validate_model(cls, model: str) -> str: permitted_models = supported_inference_models() - descriptors = [m.descriptor() for m in permitted_models if m is not None] - repos = [m.huggingface_repo for m in permitted_models if m is not None] + descriptors = [m.descriptor() for m in permitted_models] + repos = [m.huggingface_repo for m in permitted_models] if model not in (descriptors + repos): model_list = "\n\t".join(repos) raise ValueError( diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/llama_stack/providers/utils/inference/prompt_adapter.py index 5d81bb4b1b..6e4d0752e2 100644 --- a/llama_stack/providers/utils/inference/prompt_adapter.py +++ b/llama_stack/providers/utils/inference/prompt_adapter.py @@ -179,7 +179,7 @@ def chat_completion_request_to_messages( return request.messages allowed_models = supported_inference_models() - descriptors = [m.descriptor() for m in allowed_models if m is not None] + descriptors = [m.descriptor() for m in allowed_models] if model.descriptor() not in descriptors: cprint(f"Unsupported inference model? {model.descriptor()}", color="red") return request.messages From 318c98807cc0f8986ac8e3baf3f3deabd514fe05 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Thu, 21 Nov 2024 15:15:32 -0800 Subject: [PATCH 14/45] Pre-emptively test llama stack RC --- .github/workflows/gha_workflow_llama_stack_tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 8f7a25ee4c..1b56c9cb33 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -204,7 +204,8 @@ jobs: echo "[STEP] Installing 'llama-stack' models" pip install -U pip setuptools pip install -r requirements.txt - pip install -e . + #pip install -e . + pip install --extra-index-url https://test.pypi.org/simple/ llama-stack==0.0.54rc3 pip install -U \ torch torchvision \ pytest pytest_asyncio \ From 046eec9793308e0b4c200ad2f1ca4c5abb71438b Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Thu, 21 Nov 2024 16:35:00 -0800 Subject: [PATCH 15/45] Remove testing llama-stack RC --- .github/workflows/gha_workflow_llama_stack_tests.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 1b56c9cb33..8f7a25ee4c 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -204,8 +204,7 @@ jobs: echo "[STEP] Installing 'llama-stack' models" pip install -U pip setuptools pip install -r requirements.txt - #pip install -e . - pip install --extra-index-url https://test.pypi.org/simple/ llama-stack==0.0.54rc3 + pip install -e . pip install -U \ torch torchvision \ pytest pytest_asyncio \ From 496879795eb0e18cf78f55c332dc1ad89dbdb4d2 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Fri, 22 Nov 2024 06:35:29 -0800 Subject: [PATCH 16/45] Dynamically change provider in tests --- .../gha_workflow_llama_stack_tests.yml | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 8f7a25ee4c..09f367dbd4 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -41,11 +41,6 @@ on: required: true default: "meta-reference" - api_key: - description: 'Provider API key' - required: false - default: "---" - env: # Path to model checkpoints within EFS volume MODEL_CHECKPOINT_DIR: "/data/llama/Llama3.2-3B-Instruct" @@ -57,13 +52,7 @@ env: MODEL_IDS: "${{ inputs.model_ids || 'Llama3.2-3B-Instruct' }}" # ID used for each test's provider config - #PROVIDER_ID: "${{ inputs.provider_id || 'meta-reference' }}" - - # Defined dynamically when each test is run below - #PROVIDER_CONFIG: "" - - # (Unused) API key that can be manually defined for workflow dispatch - API_KEY: "${{ inputs.api_key || '' }}" + PROVIDER_ID: "${{ inputs.provider_id || 'meta-reference' }}" # Defines which directories in TESTS_PATH to exclude from the test loop EXCLUDED_DIRS: "__pycache__" @@ -78,7 +67,7 @@ jobs: pull-requests: write defaults: run: - shell: bash # default shell to run all steps for a given job. + shell: bash runs-on: ${{ inputs.runner != '' && inputs.runner || 'llama-stack-gha-runner-gpu' }} if: always() steps: @@ -236,7 +225,7 @@ jobs: for file in "$dir"/test_*.py; do test_name=$(basename "$file") new_file="result-${dir_name}-${test_name}.xml" - if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "meta_reference and llama_3b" \ + if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "${PROVIDER_ID} and llama_3b" \ --junitxml="${{ github.workspace }}/${new_file}"; then echo "Test passed: $test_name" else From 25e23a1dfe5c9d62850fd4fd669194214f161610 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Fri, 22 Nov 2024 08:56:53 -0800 Subject: [PATCH 17/45] Add debug statement for PROVIDER_ID --- .../gha_workflow_llama_stack_tests.yml | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 09f367dbd4..072650f5df 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -134,14 +134,6 @@ jobs: ############################ #### UPDATE SYSTEM PATH #### ############################ - - name: "[DEBUG] Update path: before" - id: path_update_before - if: ${{ inputs.debug == 'true' }} - run: | - echo "System path before update:" - echo "PATH=$PATH" - echo "GITHUB_PATH=$GITHUB_PATH" - - name: "Update path: execute" id: path_update_exec run: | @@ -150,14 +142,6 @@ jobs: mkdir -p ${HOME}/.local/bin echo "${HOME}/.local/bin" >> "$GITHUB_PATH" - - name: "[DEBUG] Update path: after" - id: path_update_after - if: ${{ inputs.debug == 'true' }} - run: | - echo "System path after update:" - echo "PATH=$PATH" - echo "GITHUB_PATH=$GITHUB_PATH" - ################################## #### DEPENDENCY INSTALLATIONS #### ################################## @@ -218,6 +202,7 @@ jobs: working-directory: "${{ github.workspace }}" run: | pattern="" + echo "PROVIDER_ID = ${PROVIDER_ID}" for dir in llama_stack/providers/tests/*; do if [ -d "$dir" ]; then dir_name=$(basename "$dir") From 143e91f23dcf53cb2899d06e317ddf2134885a83 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Fri, 22 Nov 2024 09:18:29 -0800 Subject: [PATCH 18/45] Add manual provider back for testing --- .github/workflows/gha_workflow_llama_stack_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 072650f5df..5875343114 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -210,7 +210,7 @@ jobs: for file in "$dir"/test_*.py; do test_name=$(basename "$file") new_file="result-${dir_name}-${test_name}.xml" - if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "${PROVIDER_ID} and llama_3b" \ + if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "meta-reference and llama_3b" \ --junitxml="${{ github.workspace }}/${new_file}"; then echo "Test passed: $test_name" else From 377896a4c55b631ba83da74fdab02ef4068ca16b Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Thu, 21 Nov 2024 16:35:00 -0800 Subject: [PATCH 19/45] Remove testing llama-stack RC --- .../providers/inline/inference/meta_reference/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py index 6743ca822d..07fd4af446 100644 --- a/llama_stack/providers/inline/inference/meta_reference/inference.py +++ b/llama_stack/providers/inline/inference/meta_reference/inference.py @@ -36,8 +36,6 @@ class MetaReferenceInferenceImpl(Inference, ModelRegistryHelper, ModelsProtocolP def __init__(self, config: MetaReferenceInferenceConfig) -> None: self.config = config model = resolve_model(config.model) - if model is None: - raise RuntimeError(f"Unknown model: {config.model}, Run `llama model list`") ModelRegistryHelper.__init__( self, [ @@ -47,6 +45,8 @@ def __init__(self, config: MetaReferenceInferenceConfig) -> None: ) ], ) + if model is None: + raise RuntimeError(f"Unknown model: {config.model}, Run `llama model list`") self.model = model # verify that the checkpoint actually is for this model lol From 1481a673657201b1f0c8b39b17be7cd2ba5be2a1 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Fri, 22 Nov 2024 10:22:12 -0800 Subject: [PATCH 20/45] Test new provider name --- .../gha_workflow_llama_stack_tests.yml | 34 ++++++++++++++++--- .../inference/meta_reference/inference.py | 4 +-- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 5875343114..8f7a25ee4c 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -41,6 +41,11 @@ on: required: true default: "meta-reference" + api_key: + description: 'Provider API key' + required: false + default: "---" + env: # Path to model checkpoints within EFS volume MODEL_CHECKPOINT_DIR: "/data/llama/Llama3.2-3B-Instruct" @@ -52,7 +57,13 @@ env: MODEL_IDS: "${{ inputs.model_ids || 'Llama3.2-3B-Instruct' }}" # ID used for each test's provider config - PROVIDER_ID: "${{ inputs.provider_id || 'meta-reference' }}" + #PROVIDER_ID: "${{ inputs.provider_id || 'meta-reference' }}" + + # Defined dynamically when each test is run below + #PROVIDER_CONFIG: "" + + # (Unused) API key that can be manually defined for workflow dispatch + API_KEY: "${{ inputs.api_key || '' }}" # Defines which directories in TESTS_PATH to exclude from the test loop EXCLUDED_DIRS: "__pycache__" @@ -67,7 +78,7 @@ jobs: pull-requests: write defaults: run: - shell: bash + shell: bash # default shell to run all steps for a given job. runs-on: ${{ inputs.runner != '' && inputs.runner || 'llama-stack-gha-runner-gpu' }} if: always() steps: @@ -134,6 +145,14 @@ jobs: ############################ #### UPDATE SYSTEM PATH #### ############################ + - name: "[DEBUG] Update path: before" + id: path_update_before + if: ${{ inputs.debug == 'true' }} + run: | + echo "System path before update:" + echo "PATH=$PATH" + echo "GITHUB_PATH=$GITHUB_PATH" + - name: "Update path: execute" id: path_update_exec run: | @@ -142,6 +161,14 @@ jobs: mkdir -p ${HOME}/.local/bin echo "${HOME}/.local/bin" >> "$GITHUB_PATH" + - name: "[DEBUG] Update path: after" + id: path_update_after + if: ${{ inputs.debug == 'true' }} + run: | + echo "System path after update:" + echo "PATH=$PATH" + echo "GITHUB_PATH=$GITHUB_PATH" + ################################## #### DEPENDENCY INSTALLATIONS #### ################################## @@ -202,7 +229,6 @@ jobs: working-directory: "${{ github.workspace }}" run: | pattern="" - echo "PROVIDER_ID = ${PROVIDER_ID}" for dir in llama_stack/providers/tests/*; do if [ -d "$dir" ]; then dir_name=$(basename "$dir") @@ -210,7 +236,7 @@ jobs: for file in "$dir"/test_*.py; do test_name=$(basename "$file") new_file="result-${dir_name}-${test_name}.xml" - if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "meta-reference and llama_3b" \ + if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "meta_reference and llama_3b" \ --junitxml="${{ github.workspace }}/${new_file}"; then echo "Test passed: $test_name" else diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py index 07fd4af446..6743ca822d 100644 --- a/llama_stack/providers/inline/inference/meta_reference/inference.py +++ b/llama_stack/providers/inline/inference/meta_reference/inference.py @@ -36,6 +36,8 @@ class MetaReferenceInferenceImpl(Inference, ModelRegistryHelper, ModelsProtocolP def __init__(self, config: MetaReferenceInferenceConfig) -> None: self.config = config model = resolve_model(config.model) + if model is None: + raise RuntimeError(f"Unknown model: {config.model}, Run `llama model list`") ModelRegistryHelper.__init__( self, [ @@ -45,8 +47,6 @@ def __init__(self, config: MetaReferenceInferenceConfig) -> None: ) ], ) - if model is None: - raise RuntimeError(f"Unknown model: {config.model}, Run `llama model list`") self.model = model # verify that the checkpoint actually is for this model lol From 0e9ed3688d75d69993f609c36929508d5eaf59ca Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Fri, 22 Nov 2024 10:58:17 -0800 Subject: [PATCH 21/45] Remove unnecessary env vars --- .../gha_workflow_llama_stack_tests.yml | 22 ++++--------------- 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 8f7a25ee4c..e2c28fbb84 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -39,12 +39,7 @@ on: provider_id: description: 'ID of your provider' required: true - default: "meta-reference" - - api_key: - description: 'Provider API key' - required: false - default: "---" + default: "meta_reference" env: # Path to model checkpoints within EFS volume @@ -57,13 +52,7 @@ env: MODEL_IDS: "${{ inputs.model_ids || 'Llama3.2-3B-Instruct' }}" # ID used for each test's provider config - #PROVIDER_ID: "${{ inputs.provider_id || 'meta-reference' }}" - - # Defined dynamically when each test is run below - #PROVIDER_CONFIG: "" - - # (Unused) API key that can be manually defined for workflow dispatch - API_KEY: "${{ inputs.api_key || '' }}" + PROVIDER_ID: "${{ inputs.provider_id || 'meta_reference' }}" # Defines which directories in TESTS_PATH to exclude from the test loop EXCLUDED_DIRS: "__pycache__" @@ -78,7 +67,7 @@ jobs: pull-requests: write defaults: run: - shell: bash # default shell to run all steps for a given job. + shell: bash runs-on: ${{ inputs.runner != '' && inputs.runner || 'llama-stack-gha-runner-gpu' }} if: always() steps: @@ -236,11 +225,8 @@ jobs: for file in "$dir"/test_*.py; do test_name=$(basename "$file") new_file="result-${dir_name}-${test_name}.xml" - if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "meta_reference and llama_3b" \ + if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "${PROVIDER_ID} and llama_3b" \ --junitxml="${{ github.workspace }}/${new_file}"; then - echo "Test passed: $test_name" - else - echo "Test failed: $test_name" fi pattern+="${new_file} " done From 9c07e0189a6c44ba4dc8928312040a4a125d4a84 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Fri, 22 Nov 2024 11:16:17 -0800 Subject: [PATCH 22/45] Fix syntax error --- .github/workflows/gha_workflow_llama_stack_tests.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index e2c28fbb84..a3121226f9 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -227,6 +227,9 @@ jobs: new_file="result-${dir_name}-${test_name}.xml" if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "${PROVIDER_ID} and llama_3b" \ --junitxml="${{ github.workspace }}/${new_file}"; then + echo "Test passed" + else + echo "Test failed" fi pattern+="${new_file} " done @@ -299,7 +302,7 @@ jobs: working-directory: "${{ github.workspace }}" if: github.event_name == 'workflow_dispatch' run: | - echo "[STEP] Running PyTest tests at 'GITHUB_WORKSPACE' path: ${GITHUB_WORKSPACE} | path: ${{ github.workspace }}" + echo "[STEP] Running PyTest tests at 'GITHUB_WORKSPACE' path: ${{ github.workspace }}" # (Optional) Add more tests here. # Merge test results with 'merged-test-results.xml' from above. From 7f5e0dd3db97db4546adb201235d48367c8da286 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Fri, 22 Nov 2024 12:05:05 -0800 Subject: [PATCH 23/45] Refactor test run to support shorthand model names --- .../gha_workflow_llama_stack_tests.yml | 101 +++++++++++++----- 1 file changed, 74 insertions(+), 27 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index a3121226f9..1165215f09 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -31,28 +31,45 @@ on: required: true default: "0" - model_ids: - description: 'Comma separated list of models to test' - required: true - default: "Llama3.2-3B-Instruct" - provider_id: description: 'ID of your provider' required: true default: "meta_reference" + model_id: + description: 'Shorthand name for model ID (llama_3b or llama_8b)' + required: true + default: "llama_3b" + + model_override_3b: + description: 'Specify manual override for the shorthand model' + required: false + default: "Llama-3.2-3B-Instruct" + + model_override_8b: + description: 'Specify manual override for the shorthand model' + required: false + default: "Llama-3.1-8B-Instruct" + env: + # ID used for each test's provider config + PROVIDER_ID: "${{ inputs.provider_id || 'meta_reference' }}" + # Path to model checkpoints within EFS volume - MODEL_CHECKPOINT_DIR: "/data/llama/Llama3.2-3B-Instruct" + MODEL_CHECKPOINT_DIR: "/data/llama/" # Path to directory to run tests from TESTS_PATH: "${{ github.workspace }}/llama_stack/providers/tests" - # List of models that are to be tested - MODEL_IDS: "${{ inputs.model_ids || 'Llama3.2-3B-Instruct' }}" + # Keep track of a list of model IDs that are valid to use within pytest fixture marks + AVAILABLE_MODEL_IDs: "llama_3b llama_8b" - # ID used for each test's provider config - PROVIDER_ID: "${{ inputs.provider_id || 'meta_reference' }}" + # Shorthand name for model ID, used in pytest fixture marks + MODEL_ID: "${{ inputs.model_id || 'llama_3b' }}" + + # Set the llama 3b / 8b override for models if desired, else use the default. + LLAMA_3B_OVERRIDE: "${{ inputs.model_override_3b || 'Llama-3.2-3B-Instruct' }}" + LLAMA_8B_OVERRIDE: "${{ inputs.model_override_8b || 'Llama-3.1-8B-Instruct' }}" # Defines which directories in TESTS_PATH to exclude from the test loop EXCLUDED_DIRS: "__pycache__" @@ -83,26 +100,41 @@ jobs: echo "========= Content of the EFS mount =============" ls -la ${{ env.MODEL_CHECKPOINT_DIR }} - - name: "Check if models exist in EFS volume" - id: check_if_models_exist - run: | - #for model_id in ${MODEL_IDS//,/ }; do - # model_path="${MODEL_CHECKPOINT_DIR}/${model_id}" - # if [ ! -d "${model_path}" ]; then - # echo "Model '${model_id}' does not exist in mounted EFS volume, Terminating workflow." - # exit 1 - # else - # echo "Content of '${model_id}' model" - # ls -la "${model_path}" - # fi - #done - - name: "[DEBUG] Get runner container OS information" id: debug_os_info if: ${{ inputs.debug == 'true' }} run: | cat /etc/os-release + ############################ + #### MODEL INPUT CHECKS #### + ############################ + + - name: "Check if env.model_id is valid" + id: check_model_id + run: | + if [[ " ${AVAILABLE_MODEL_IDs[@]} " =~ " ${MODEL_ID} " ]]; then + echo "Model ID ${MODEL_ID} is valid" + else + echo "Model ID ${MODEL_ID} is invalid, Terminating workflow." + exit 1 + fi + + - name: "Check if models exist in EFS volume" + id: check_if_models_exist + run: | + MODEL_IDS="${LLAMA_3B_OVERRIDE},${LLAMA_8B_OVERRIDE}" + for model_id in ${MODEL_IDS//,/ }; do + model_path="${MODEL_CHECKPOINT_DIR}/${model_id}" + if [ ! -d "${model_path}" ]; then + echo "Model '${model_id}' does not exist in mounted EFS volume, Terminating workflow." + exit 1 + else + echo "Content of '${model_id}' model" + ls -la "${model_path}" + fi + done + ####################### #### CODE CHECKOUT #### ####################### @@ -158,6 +190,21 @@ jobs: echo "PATH=$PATH" echo "GITHUB_PATH=$GITHUB_PATH" + ##################################### + #### UPDATE CHECKPOINT DIRECTORY #### + ##################################### + - name: "Update checkpoint directory" + id: checkpoint_update + run: | + if ${MODEL_ID} == "llama_3b" ; then + echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/$LLAMA_3B_OVERRIDE" >> "$GITHUB_ENV" + elif ${MODEL_ID} == "llama_8b" ; then + echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/$LLAMA_8B_OVERRIDE" >> "$GITHUB_ENV" + else + echo "MODEL_ID is not valid, Terminating workflow." + exit 1 + fi + ################################## #### DEPENDENCY INSTALLATIONS #### ################################## @@ -225,11 +272,11 @@ jobs: for file in "$dir"/test_*.py; do test_name=$(basename "$file") new_file="result-${dir_name}-${test_name}.xml" - if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "${PROVIDER_ID} and llama_3b" \ + if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "${PROVIDER_ID} and ${MODEL_ID}" \ --junitxml="${{ github.workspace }}/${new_file}"; then - echo "Test passed" + echo "Ran test: ${test_name}" else - echo "Test failed" + echo "Did NOT run test: ${test_name}" fi pattern+="${new_file} " done From d1d8f859e6a945def8aad83b318c8b787e9a702b Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Fri, 22 Nov 2024 12:51:34 -0800 Subject: [PATCH 24/45] Update checkpointd directory setting --- .../gha_workflow_llama_stack_tests.yml | 27 +++++-------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 1165215f09..8e5731d1b6 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -56,7 +56,7 @@ env: PROVIDER_ID: "${{ inputs.provider_id || 'meta_reference' }}" # Path to model checkpoints within EFS volume - MODEL_CHECKPOINT_DIR: "/data/llama/" + MODEL_CHECKPOINT_DIR: "/data/llama" # Path to directory to run tests from TESTS_PATH: "${{ github.workspace }}/llama_stack/providers/tests" @@ -114,27 +114,12 @@ jobs: id: check_model_id run: | if [[ " ${AVAILABLE_MODEL_IDs[@]} " =~ " ${MODEL_ID} " ]]; then - echo "Model ID ${MODEL_ID} is valid" + echo "Model ID '${MODEL_ID}' is valid" else - echo "Model ID ${MODEL_ID} is invalid, Terminating workflow." + echo "Model ID '${MODEL_ID}' is invalid, Terminating workflow." exit 1 fi - - name: "Check if models exist in EFS volume" - id: check_if_models_exist - run: | - MODEL_IDS="${LLAMA_3B_OVERRIDE},${LLAMA_8B_OVERRIDE}" - for model_id in ${MODEL_IDS//,/ }; do - model_path="${MODEL_CHECKPOINT_DIR}/${model_id}" - if [ ! -d "${model_path}" ]; then - echo "Model '${model_id}' does not exist in mounted EFS volume, Terminating workflow." - exit 1 - else - echo "Content of '${model_id}' model" - ls -la "${model_path}" - fi - done - ####################### #### CODE CHECKOUT #### ####################### @@ -196,12 +181,12 @@ jobs: - name: "Update checkpoint directory" id: checkpoint_update run: | - if ${MODEL_ID} == "llama_3b" ; then + if ${MODEL_ID} == "llama_3b" && [ -d "${MODEL_CHECKPOINT_DIR}/$LLAMA_3B_OVERRIDE" ]; then echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/$LLAMA_3B_OVERRIDE" >> "$GITHUB_ENV" - elif ${MODEL_ID} == "llama_8b" ; then + elif ${MODEL_ID} == "llama_8b" && [ -d "${MODEL_CHECKPOINT_DIR}/$LLAMA_8B_OVERRIDE" ]; then echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/$LLAMA_8B_OVERRIDE" >> "$GITHUB_ENV" else - echo "MODEL_ID is not valid, Terminating workflow." + echo "MODEL_ID & LLAMA_*B_OVERRIDE are not a valid pairing, Terminating workflow." exit 1 fi From cbd69d06c366b88dc105e57695ca72a3c53e1652 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Fri, 22 Nov 2024 13:52:58 -0800 Subject: [PATCH 25/45] Clean up checkpoint directory setting --- .github/workflows/gha_workflow_llama_stack_tests.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 8e5731d1b6..e89d70c3a6 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -44,12 +44,12 @@ on: model_override_3b: description: 'Specify manual override for the shorthand model' required: false - default: "Llama-3.2-3B-Instruct" + default: "Llama3.2-3B-Instruct" model_override_8b: description: 'Specify manual override for the shorthand model' required: false - default: "Llama-3.1-8B-Instruct" + default: "Llama3.1-8B-Instruct" env: # ID used for each test's provider config @@ -68,8 +68,8 @@ env: MODEL_ID: "${{ inputs.model_id || 'llama_3b' }}" # Set the llama 3b / 8b override for models if desired, else use the default. - LLAMA_3B_OVERRIDE: "${{ inputs.model_override_3b || 'Llama-3.2-3B-Instruct' }}" - LLAMA_8B_OVERRIDE: "${{ inputs.model_override_8b || 'Llama-3.1-8B-Instruct' }}" + LLAMA_3B_OVERRIDE: "${{ inputs.model_override_3b || 'Llama3.2-3B-Instruct' }}" + LLAMA_8B_OVERRIDE: "${{ inputs.model_override_8b || 'Llama3.1-8B-Instruct' }}" # Defines which directories in TESTS_PATH to exclude from the test loop EXCLUDED_DIRS: "__pycache__" @@ -181,6 +181,7 @@ jobs: - name: "Update checkpoint directory" id: checkpoint_update run: | + echo "Checkpoint directory: ${MODEL_CHECKPOINT_DIR}/$LLAMA_3B_OVERRIDE" if ${MODEL_ID} == "llama_3b" && [ -d "${MODEL_CHECKPOINT_DIR}/$LLAMA_3B_OVERRIDE" ]; then echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/$LLAMA_3B_OVERRIDE" >> "$GITHUB_ENV" elif ${MODEL_ID} == "llama_8b" && [ -d "${MODEL_CHECKPOINT_DIR}/$LLAMA_8B_OVERRIDE" ]; then From 8f60a3a55d96ec8ee42a18a01f93ed8e0b4e89c8 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Fri, 22 Nov 2024 14:41:20 -0800 Subject: [PATCH 26/45] Clean up job names --- .../gha_workflow_llama_stack_tests.yml | 46 ++++++------------- 1 file changed, 13 insertions(+), 33 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index e89d70c3a6..5f5fb56d5f 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -37,7 +37,7 @@ on: default: "meta_reference" model_id: - description: 'Shorthand name for model ID (llama_3b or llama_8b)' + description: 'Shorthand name for target model ID (llama_3b or llama_8b)' required: true default: "llama_3b" @@ -67,7 +67,7 @@ env: # Shorthand name for model ID, used in pytest fixture marks MODEL_ID: "${{ inputs.model_id || 'llama_3b' }}" - # Set the llama 3b / 8b override for models if desired, else use the default. + # Override the `llama_3b` / `llama_8b' models, else use the default. LLAMA_3B_OVERRIDE: "${{ inputs.model_override_3b || 'Llama3.2-3B-Instruct' }}" LLAMA_8B_OVERRIDE: "${{ inputs.model_override_8b || 'Llama3.1-8B-Instruct' }}" @@ -151,14 +151,6 @@ jobs: ############################ #### UPDATE SYSTEM PATH #### ############################ - - name: "[DEBUG] Update path: before" - id: path_update_before - if: ${{ inputs.debug == 'true' }} - run: | - echo "System path before update:" - echo "PATH=$PATH" - echo "GITHUB_PATH=$GITHUB_PATH" - - name: "Update path: execute" id: path_update_exec run: | @@ -167,14 +159,6 @@ jobs: mkdir -p ${HOME}/.local/bin echo "${HOME}/.local/bin" >> "$GITHUB_PATH" - - name: "[DEBUG] Update path: after" - id: path_update_after - if: ${{ inputs.debug == 'true' }} - run: | - echo "System path after update:" - echo "PATH=$PATH" - echo "GITHUB_PATH=$GITHUB_PATH" - ##################################### #### UPDATE CHECKPOINT DIRECTORY #### ##################################### @@ -182,10 +166,10 @@ jobs: id: checkpoint_update run: | echo "Checkpoint directory: ${MODEL_CHECKPOINT_DIR}/$LLAMA_3B_OVERRIDE" - if ${MODEL_ID} == "llama_3b" && [ -d "${MODEL_CHECKPOINT_DIR}/$LLAMA_3B_OVERRIDE" ]; then - echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/$LLAMA_3B_OVERRIDE" >> "$GITHUB_ENV" - elif ${MODEL_ID} == "llama_8b" && [ -d "${MODEL_CHECKPOINT_DIR}/$LLAMA_8B_OVERRIDE" ]; then - echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/$LLAMA_8B_OVERRIDE" >> "$GITHUB_ENV" + if [ "${MODEL_ID}" = "llama_3b" ] && [ -d "${MODEL_CHECKPOINT_DIR}/${LLAMA_3B_OVERRIDE}" ]; then + echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/${LLAMA_3B_OVERRIDE}" >> "$GITHUB_ENV" + elif [ "${MODEL_ID}" = "llama_8b" ] && [ -d "${MODEL_CHECKPOINT_DIR}/${LLAMA_8B_OVERRIDE}" ]; then + echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/${LLAMA_8B_OVERRIDE}" >> "$GITHUB_ENV" else echo "MODEL_ID & LLAMA_*B_OVERRIDE are not a valid pairing, Terminating workflow." exit 1 @@ -220,7 +204,7 @@ jobs: run: | sudo npm install -g junit-merge - - name: "Installing 'llama-stack' dependencies" + - name: "Installing pip dependencies" id: install_pip_generic run: | echo "[STEP] Installing 'llama-stack' models" @@ -234,20 +218,16 @@ jobs: zmq chardet pypdf \ pandas sentence_transformers together \ aiosqlite - conda install -q -c pytorch -c nvidia faiss-gpu=1.9.0 - - - name: "Installing specific manual_dispatch dependencies" - id: manual_install_pip - if: github.event_name == 'workflow_dispatch' + - name: "Installing packages with conda" + id: install_conda_generic run: | - echo "[STEP] Installing specific dependencies for manual dispatch workflows" - # N.A. + conda install -q -c pytorch -c nvidia faiss-gpu=1.9.0 ############################################################# #### TESTING TO BE DONE FOR BOTH PRS AND MANUAL DISPATCH #### ############################################################# - - name: "Manual - Run Tests: Loop" - id: manual_run_tests_loop + - name: "Run Tests: Loop" + id: run_tests_loop working-directory: "${{ github.workspace }}" run: | pattern="" @@ -271,7 +251,7 @@ jobs: done echo "REPORTS_GEN=$pattern" >> "$GITHUB_ENV" - - name: "Manual - Test Summary: Merge" + - name: "Test Summary: Merge" id: test_summary_merge if: always() working-directory: "${{ github.workspace }}" From 071710426d8d972a40f8168066a3740d623f08f4 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Fri, 22 Nov 2024 15:26:32 -0800 Subject: [PATCH 27/45] Try shortening test formula --- .github/workflows/gha_workflow_llama_stack_tests.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 5f5fb56d5f..d68d6801dc 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -229,6 +229,7 @@ jobs: - name: "Run Tests: Loop" id: run_tests_loop working-directory: "${{ github.workspace }}" + if: github.event_name == 'pull_request_target' run: | pattern="" for dir in llama_stack/providers/tests/*; do @@ -253,8 +254,8 @@ jobs: - name: "Test Summary: Merge" id: test_summary_merge - if: always() working-directory: "${{ github.workspace }}" + if: github.event_name == 'pull_request_target' run: | echo "Merging the following test result files: ${REPORTS_GEN}" # Defaults to merging them into 'merged-test-results.xml' @@ -317,7 +318,7 @@ jobs: run: | echo "[STEP] Running PyTest tests at 'GITHUB_WORKSPACE' path: ${{ github.workspace }}" # (Optional) Add more tests here. - + pytest -s -v -m "${PROVIDER_ID} and ${MODEL_ID}" --junitxml="${{ github.workspace }}/merged-test-results.xml" # Merge test results with 'merged-test-results.xml' from above. # junit-merge merged-test-results.xml From e428b8239816a1352b55ed359e8ed1f63bf115b9 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Mon, 25 Nov 2024 06:44:55 -0800 Subject: [PATCH 28/45] Revert test formula --- .github/workflows/gha_workflow_llama_stack_tests.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index d68d6801dc..6eb2700dc9 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -229,7 +229,6 @@ jobs: - name: "Run Tests: Loop" id: run_tests_loop working-directory: "${{ github.workspace }}" - if: github.event_name == 'pull_request_target' run: | pattern="" for dir in llama_stack/providers/tests/*; do @@ -255,7 +254,6 @@ jobs: - name: "Test Summary: Merge" id: test_summary_merge working-directory: "${{ github.workspace }}" - if: github.event_name == 'pull_request_target' run: | echo "Merging the following test result files: ${REPORTS_GEN}" # Defaults to merging them into 'merged-test-results.xml' @@ -317,8 +315,12 @@ jobs: if: github.event_name == 'workflow_dispatch' run: | echo "[STEP] Running PyTest tests at 'GITHUB_WORKSPACE' path: ${{ github.workspace }}" + + #TODO Use this when collection errors are resolved + # pytest -s -v -m "${PROVIDER_ID} and ${MODEL_ID}" --junitxml="${{ github.workspace }}/merged-test-results.xml" + # (Optional) Add more tests here. - pytest -s -v -m "${PROVIDER_ID} and ${MODEL_ID}" --junitxml="${{ github.workspace }}/merged-test-results.xml" + # Merge test results with 'merged-test-results.xml' from above. # junit-merge merged-test-results.xml From cd0c80d61f0573c1ba90e60c7e8aa0a93291f637 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Mon, 25 Nov 2024 07:13:54 -0800 Subject: [PATCH 29/45] Add env vars debug printout --- .../gha_workflow_llama_stack_tests.yml | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 6eb2700dc9..af9712be57 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -42,12 +42,12 @@ on: default: "llama_3b" model_override_3b: - description: 'Specify manual override for the shorthand model' + description: 'Specify shorthand model for ' required: false default: "Llama3.2-3B-Instruct" model_override_8b: - description: 'Specify manual override for the shorthand model' + description: 'Specify shorthand model for ' required: false default: "Llama3.1-8B-Instruct" @@ -106,6 +106,19 @@ jobs: run: | cat /etc/os-release + - name: "[DEBUG] Print environment variables" + id: debug_env_vars + if: ${{ inputs.debug == 'true' }} + run: | + echo "PROVIDER_ID = ${PROVIDER_ID}" + echo "MODEL_CHECKPOINT_DIR = ${MODEL_CHECKPOINT_DIR}" + echo "AVAILABLE_MODEL_IDs = ${AVAILABLE_MODEL_IDs}" + echo "MODEL_ID = ${MODEL_ID}" + echo "LLAMA_3B_OVERRIDE = ${LLAMA_3B_OVERRIDE}" + echo "LLAMA_8B_OVERRIDE = ${LLAMA_8B_OVERRIDE}" + echo "EXCLUDED_DIRS = ${EXCLUDED_DIRS}" + echo "REPORTS_GEN = ${REPORTS_GEN}" + ############################ #### MODEL INPUT CHECKS #### ############################ @@ -114,9 +127,9 @@ jobs: id: check_model_id run: | if [[ " ${AVAILABLE_MODEL_IDs[@]} " =~ " ${MODEL_ID} " ]]; then - echo "Model ID '${MODEL_ID}' is valid" + echo "Model ID '${MODEL_ID}' is valid." else - echo "Model ID '${MODEL_ID}' is invalid, Terminating workflow." + echo "Model ID '${MODEL_ID}' is invalid. Terminating workflow." exit 1 fi @@ -171,7 +184,7 @@ jobs: elif [ "${MODEL_ID}" = "llama_8b" ] && [ -d "${MODEL_CHECKPOINT_DIR}/${LLAMA_8B_OVERRIDE}" ]; then echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/${LLAMA_8B_OVERRIDE}" >> "$GITHUB_ENV" else - echo "MODEL_ID & LLAMA_*B_OVERRIDE are not a valid pairing, Terminating workflow." + echo "MODEL_ID & LLAMA_*B_OVERRIDE are not a valid pairing. Terminating workflow." exit 1 fi From 1912ff2341bc54e595fb651c3bff3fe2337da0d5 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Mon, 25 Nov 2024 07:30:51 -0800 Subject: [PATCH 30/45] Temporarily make repo point to fork for PR testing --- .github/workflows/gha_workflow_llama_stack_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index af9712be57..1c4f84dc37 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -85,7 +85,7 @@ jobs: defaults: run: shell: bash - runs-on: ${{ inputs.runner != '' && inputs.runner || 'llama-stack-gha-runner-gpu' }} + runs-on: ${{ inputs.runner != '' && inputs.runner || 'llama-stack-fork-gha-runner-gpu' }} if: always() steps: From ac1974353cbe4461d7c39dd5fd228cf92c9b8f4e Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Mon, 25 Nov 2024 07:46:09 -0800 Subject: [PATCH 31/45] Revert fork target back to main --- .github/workflows/gha_workflow_llama_stack_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 1c4f84dc37..af9712be57 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -85,7 +85,7 @@ jobs: defaults: run: shell: bash - runs-on: ${{ inputs.runner != '' && inputs.runner || 'llama-stack-fork-gha-runner-gpu' }} + runs-on: ${{ inputs.runner != '' && inputs.runner || 'llama-stack-gha-runner-gpu' }} if: always() steps: From 8d83759caf612930a98855586292eebc34a7ea6b Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Mon, 25 Nov 2024 08:10:04 -0800 Subject: [PATCH 32/45] Add MODEL_CHECKPOINT_DIR check after update --- .github/workflows/gha_workflow_llama_stack_tests.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index af9712be57..d957e71966 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -188,6 +188,12 @@ jobs: exit 1 fi + - name: "[DEBUG] Checkpoint update check" + id: debug_checkpoint_update + if: ${{ inputs.debug == 'true' }} + run: | + echo "MODEL_CHECKPOINT_DIR (after update) = ${MODEL_CHECKPOINT_DIR}" + ################################## #### DEPENDENCY INSTALLATIONS #### ################################## From bbea9bccf1f92f23305dcacd12c4ed33b5c10631 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Mon, 25 Nov 2024 09:20:27 -0800 Subject: [PATCH 33/45] Revert provider / inference config back to mainline --- .../providers/inline/inference/meta_reference/inference.py | 4 ++-- llama_stack/templates/meta-reference-gpu/meta_reference.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py index 6743ca822d..07fd4af446 100644 --- a/llama_stack/providers/inline/inference/meta_reference/inference.py +++ b/llama_stack/providers/inline/inference/meta_reference/inference.py @@ -36,8 +36,6 @@ class MetaReferenceInferenceImpl(Inference, ModelRegistryHelper, ModelsProtocolP def __init__(self, config: MetaReferenceInferenceConfig) -> None: self.config = config model = resolve_model(config.model) - if model is None: - raise RuntimeError(f"Unknown model: {config.model}, Run `llama model list`") ModelRegistryHelper.__init__( self, [ @@ -47,6 +45,8 @@ def __init__(self, config: MetaReferenceInferenceConfig) -> None: ) ], ) + if model is None: + raise RuntimeError(f"Unknown model: {config.model}, Run `llama model list`") self.model = model # verify that the checkpoint actually is for this model lol diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index 2408ef972c..f254bc9203 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -81,7 +81,7 @@ def get_distribution_template() -> DistributionTemplate: "Port for the Llama Stack distribution server", ), "INFERENCE_MODEL": ( - "meta-llama/Llama3.2-3B-Instruct", + "meta-llama/Llama-3.2-3B-Instruct", "Inference model loaded into the Meta Reference server", ), "INFERENCE_CHECKPOINT_DIR": ( From dc4e755bdc96f1d3143182c980aad19f86142850 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Wed, 4 Dec 2024 08:04:04 -0800 Subject: [PATCH 34/45] Temporarily disable PR runs until manual testing is complete --- .../gha_workflow_llama_stack_tests.yml | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index d957e71966..89e5edf716 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -1,13 +1,15 @@ name: "Run Llama-stack Tests" on: - pull_request_target: - types: ["opened"] - branches: - - 'main' - paths: - - 'llama_stack/**/*.py' - - 'tests/**/*.py' + #### Temporarily disable PR runs until tests run as intended within mainline. + #TODO Add this back. + #pull_request_target: + # types: ["opened"] + # branches: + # - 'main' + # paths: + # - 'llama_stack/**/*.py' + # - 'tests/**/*.py' workflow_dispatch: inputs: @@ -16,8 +18,8 @@ on: required: true default: "llama-stack-gha-runner-gpu" - branch: - description: "Branch to checkout" + checkout_reference: + description: "The branch, tag, or SHA to checkout" required: true default: "main" From 7e46e9339c292428aff1df384ad64914c8b24d25 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Wed, 4 Dec 2024 11:57:25 -0800 Subject: [PATCH 35/45] Update cicd branch to test removing ollama dependency --- .github/workflows/gha_workflow_llama_stack_tests.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 89e5edf716..e895169dd0 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -206,10 +206,10 @@ jobs: sudo apt update -y sudo apt install -y python3 python3-pip npm wget - - name: "Installing packages with 'curl'" - id: install_curl - run: | - curl -fsSL https://ollama.com/install.sh | sh + #- name: "Installing packages with 'curl'" + # id: install_curl + # run: | + # curl -fsSL https://ollama.com/install.sh | sh - name: "Installing packages with 'wget'" id: install_wget From d2cef848f326b876470c63b9fbc397ff550a05b9 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Wed, 8 Jan 2025 11:35:15 -0800 Subject: [PATCH 36/45] Fix renaming of checkout_reference from branch --- .github/workflows/gha_workflow_llama_stack_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index e895169dd0..aeb083f985 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -142,7 +142,7 @@ jobs: id: checkout_repo uses: actions/checkout@v4 with: - ref: ${{ inputs.branch }} + ref: ${{ inputs.checkout_reference }} - name: "[DEBUG] Content of the repository after checkout" id: debug_content_after_checkout From 5c14fdd64b254b7e18f6a0ed23c53c7be90bf581 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Wed, 8 Jan 2025 12:08:34 -0800 Subject: [PATCH 37/45] Test new import for SqliteKVStoreConfig --- llama_stack/providers/tests/agents/test_persistence.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/tests/agents/test_persistence.py b/llama_stack/providers/tests/agents/test_persistence.py index 38eb7de55a..e6b1470efb 100644 --- a/llama_stack/providers/tests/agents/test_persistence.py +++ b/llama_stack/providers/tests/agents/test_persistence.py @@ -9,7 +9,9 @@ from llama_stack.apis.agents import AgentConfig, Turn from llama_stack.apis.inference import SamplingParams, UserMessage from llama_stack.providers.datatypes import Api -from llama_stack.providers.utils.kvstore import kvstore_impl, SqliteKVStoreConfig +from llama_stack.providers.utils.kvstore import kvstore_impl +from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig + from .fixtures import pick_inference_model from .utils import create_agent_session From 5942dffa0f8ec968aeb82584e12d84728dbb90fa Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Thu, 9 Jan 2025 09:06:16 -0800 Subject: [PATCH 38/45] Fix other ImportErrors --- llama_stack/providers/tests/inference/test_embeddings.py | 3 ++- .../providers/tests/post_training/test_post_training.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/tests/inference/test_embeddings.py b/llama_stack/providers/tests/inference/test_embeddings.py index bf09896c14..ca0276ed67 100644 --- a/llama_stack/providers/tests/inference/test_embeddings.py +++ b/llama_stack/providers/tests/inference/test_embeddings.py @@ -6,7 +6,8 @@ import pytest -from llama_stack.apis.inference import EmbeddingsResponse, ModelType +from llama_stack.apis.inference import EmbeddingsResponse +from llama_stack.apis.models import ModelType # How to run this test: # pytest -v -s llama_stack/providers/tests/inference/test_embeddings.py diff --git a/llama_stack/providers/tests/post_training/test_post_training.py b/llama_stack/providers/tests/post_training/test_post_training.py index 0645cd5556..0c58c1fa00 100644 --- a/llama_stack/providers/tests/post_training/test_post_training.py +++ b/llama_stack/providers/tests/post_training/test_post_training.py @@ -5,7 +5,7 @@ # the root directory of this source tree. import pytest -from llama_stack.apis.common.type_system import JobStatus +from llama_stack.apis.common.job_types import JobStatus from llama_stack.apis.post_training import ( Checkpoint, DataConfig, From 488b5d2c7fb8d56ae348ed1b94f771ad59d4c937 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Thu, 9 Jan 2025 09:18:04 -0800 Subject: [PATCH 39/45] Update actions artifact versioning --- .github/workflows/gha_workflow_llama_stack_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index aeb083f985..43388be464 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -310,7 +310,7 @@ jobs: - name: "PR - Upload Test Summary" id: pr_test_summary_upload if: github.event_name == 'pull_request_target' - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: test-summary path: test-summary.md From df072c84933fcf8557d40cc596462c12545cf75d Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Thu, 9 Jan 2025 11:10:03 -0800 Subject: [PATCH 40/45] Schedule manual dispatch of workflow via cron scheduling --- .github/workflows/gha_workflow_llama_stack_tests.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 43388be464..242d1b85b5 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -11,6 +11,10 @@ on: # - 'llama_stack/**/*.py' # - 'tests/**/*.py' + # Schedule cron job at 2:30 am EST every day of the week. + # Will run a manual workflow off of 'main' branch. + schedule: + - cron: '30 7 * * *' workflow_dispatch: inputs: runner: From fa99fbfb39b04712025c53da10fda5c11c41fa46 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Thu, 9 Jan 2025 11:15:37 -0800 Subject: [PATCH 41/45] Remove ollama dependency --- .github/workflows/gha_workflow_llama_stack_tests.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 242d1b85b5..9dc6b19a4c 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -210,11 +210,6 @@ jobs: sudo apt update -y sudo apt install -y python3 python3-pip npm wget - #- name: "Installing packages with 'curl'" - # id: install_curl - # run: | - # curl -fsSL https://ollama.com/install.sh | sh - - name: "Installing packages with 'wget'" id: install_wget run: | From e5da50fcc30a8c1f5baa774967915d2956678626 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Thu, 9 Jan 2025 14:47:58 -0800 Subject: [PATCH 42/45] Modify test call for test_model_registration.py within workflow --- .github/workflows/gha_workflow_llama_stack_tests.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 9dc6b19a4c..b2b59608fd 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -258,7 +258,17 @@ jobs: for file in "$dir"/test_*.py; do test_name=$(basename "$file") new_file="result-${dir_name}-${test_name}.xml" - if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "${PROVIDER_ID} and ${MODEL_ID}" \ + #TODO + # Add if test_name = test_model_registration.py then echo "hello" + if [ "$test_name" = "test_model_registration.py" ]; then + echo "Found test_model_registration.py" + if torchrun $(which pytest) -v -s -k "${PROVIDER_ID}" --inference-model="${MODEL_ID}" \ + --junitxml="${{ github.workspace }}/${new_file}"; then + echo "Ran the test_model_registration.py" + else + echo "Did NOT run the test_model_registration.py" + fi + elif torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "${PROVIDER_ID} and ${MODEL_ID}" \ --junitxml="${{ github.workspace }}/${new_file}"; then echo "Ran test: ${test_name}" else From b61108c3f0939378f407224652c8b944d3bb0950 Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Thu, 9 Jan 2025 17:13:01 -0800 Subject: [PATCH 43/45] Revert workflow back to original status for PR#737 --- .github/workflows/gha_workflow_llama_stack_tests.yml | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index b2b59608fd..9dc6b19a4c 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -258,17 +258,7 @@ jobs: for file in "$dir"/test_*.py; do test_name=$(basename "$file") new_file="result-${dir_name}-${test_name}.xml" - #TODO - # Add if test_name = test_model_registration.py then echo "hello" - if [ "$test_name" = "test_model_registration.py" ]; then - echo "Found test_model_registration.py" - if torchrun $(which pytest) -v -s -k "${PROVIDER_ID}" --inference-model="${MODEL_ID}" \ - --junitxml="${{ github.workspace }}/${new_file}"; then - echo "Ran the test_model_registration.py" - else - echo "Did NOT run the test_model_registration.py" - fi - elif torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "${PROVIDER_ID} and ${MODEL_ID}" \ + if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "${PROVIDER_ID} and ${MODEL_ID}" \ --junitxml="${{ github.workspace }}/${new_file}"; then echo "Ran test: ${test_name}" else From 562900de5f5969231cf77aded9eae003f873ed4f Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Mon, 27 Jan 2025 15:03:37 -0800 Subject: [PATCH 44/45] Add tests for schedule job --- .../workflows/gha_workflow_llama_stack_tests.yml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 9dc6b19a4c..b6a4f9f52a 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -91,7 +91,7 @@ jobs: defaults: run: shell: bash - runs-on: ${{ inputs.runner != '' && inputs.runner || 'llama-stack-gha-runner-gpu' }} + runs-on: ${{ inputs.runner != '' && inputs.runner || github.repository_owner == 'meta-llama' && 'llama-stack-gha-runner-gpu' || 'llama-stack-fork-gha-runner-gpu' }} if: always() steps: @@ -233,7 +233,7 @@ jobs: pip install -e . pip install -U \ torch torchvision \ - pytest pytest_asyncio \ + pytest pytest_asyncio pytest_html \ fairscale lm-format-enforcer \ zmq chardet pypdf \ pandas sentence_transformers together \ @@ -332,7 +332,7 @@ jobs: - name: "Manual - Run Tests: Prep" id: manual_run_tests working-directory: "${{ github.workspace }}" - if: github.event_name == 'workflow_dispatch' + if: contains(fromJSON('["workflow_dispatch", "schedule"]'), github.event_name) run: | echo "[STEP] Running PyTest tests at 'GITHUB_WORKSPACE' path: ${{ github.workspace }}" @@ -344,11 +344,16 @@ jobs: # Merge test results with 'merged-test-results.xml' from above. # junit-merge merged-test-results.xml + + ##################### + #### ALL TESTING #### + ##################### + #### Create test summary #### - name: "Manual - Test Summary" id: manual_test_summary - if: always() && github.event_name == 'workflow_dispatch' + if: always() uses: test-summary/action@v2 with: paths: "${{ github.workspace }}/merged-test-results.xml" From 9fb3ef791df89f49bb767fcccd177bff0b06b4de Mon Sep 17 00:00:00 2001 From: Connor Hack Date: Mon, 27 Jan 2025 15:53:20 -0800 Subject: [PATCH 45/45] Update library dependencies --- .github/workflows/gha_workflow_llama_stack_tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index b6a4f9f52a..99378e61c3 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -237,7 +237,8 @@ jobs: fairscale lm-format-enforcer \ zmq chardet pypdf \ pandas sentence_transformers together \ - aiosqlite + aiosqlite \ + openai - name: "Installing packages with conda" id: install_conda_generic run: |