diff --git a/.github/workflows/eval_test.yaml b/.github/workflows/ci_eval.yaml similarity index 95% rename from .github/workflows/eval_test.yaml rename to .github/workflows/ci_eval.yaml index ec3a0f3ca..d3681d95a 100644 --- a/.github/workflows/eval_test.yaml +++ b/.github/workflows/ci_eval.yaml @@ -55,7 +55,7 @@ jobs: pip install --no-compile -r pytorch-cpu-requirements.txt pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \ -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" - pip install --no-compile -r requirements.txt -e sharktank/ + pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/ - name: Run perplexity test run: pytest sharktank/tests/evaluate/perplexity_test.py --longrun diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 8b3f50944..a005acf14 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -86,7 +86,7 @@ jobs: pip install --no-compile -r pytorch-rocm-requirements.txt pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \ -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" - pip install --no-compile -r requirements.txt -e sharktank/ shortfin/ + pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/ shortfin/ - name: Run punet tests run: | diff --git a/sharktank/conftest.py b/sharktank/conftest.py index 21498d728..040775409 100644 --- a/sharktank/conftest.py +++ b/sharktank/conftest.py @@ -128,14 +128,6 @@ def pytest_addoption(parser): help="Llama3.1 8B & 405B model baseline perplexity scores json", ) - parser.addoption( - "--current-perplexity-scores-json", - type=Path, - action="store", - default="sharktank/tests/evaluate/current_perplexity_scores.json", - help="Llama3.1 8B & 405B model current perplexity scores json", - ) - def set_fixture_from_cli_option( request: FixtureRequest, @@ -200,7 +192,4 @@ def get_model_path(request: FixtureRequest): model_path["baseline_perplexity_score_json"] = set_fixture_from_cli_option( request, "--baseline-perplexity-score-json", "baseline_perplexity_score_json" ) - model_path["current_perplexity_scores_json"] = set_fixture_from_cli_option( - request, "--current-perplexity-scores-json", "current_perplexity_scores_json" - ) return model_path diff --git a/sharktank/tests/evaluate/perplexity_test.py b/sharktank/tests/evaluate/perplexity_test.py index a9287ea3b..faf3a263f 100644 --- a/sharktank/tests/evaluate/perplexity_test.py +++ b/sharktank/tests/evaluate/perplexity_test.py @@ -23,11 +23,6 @@ def setUp(self): with open(self.baseline_perplexity_score_json, "r") as f: self.baseline_perplexity = json.load(f) - def save_perplexity(self, model_name: str, current_perplexity: dict): - self.current_perplexity_all = {model_name: current_perplexity} - with open(self.current_perplexity_scores_json, "w") as f: - json.dump(self.current_perplexity_all, f) - @longrun def test_llama3_8B_f16_decomposed(self): @@ -43,8 +38,6 @@ def test_llama3_8B_f16_decomposed(self): ] ) - self.save_perplexity(model_name, current_perplexity) - self.assertAlmostEqual( baseline_perplexity["mean_perplexity"], current_perplexity["mean_perplexity"], @@ -71,8 +64,6 @@ def test_llama3_8B_f16_non_decomposed(self): ] ) - self.save_perplexity(model_name, current_perplexity) - self.assertAlmostEqual( baseline_perplexity["mean_perplexity"], current_perplexity["mean_perplexity"], @@ -98,8 +89,6 @@ def test_llama3_8B_fp8_decomposed(self): ] ) - self.save_perplexity(model_name, current_perplexity) - self.assertAlmostEqual( baseline_perplexity["mean_perplexity"], current_perplexity["mean_perplexity"], @@ -126,8 +115,6 @@ def test_llama3_8B_fp8_non_decomposed(self): ] ) - self.save_perplexity(model_name, current_perplexity) - self.assertAlmostEqual( baseline_perplexity["mean_perplexity"], current_perplexity["mean_perplexity"], @@ -151,8 +138,6 @@ def test_llama3_405B_f16_decomposed(self): ] ) - self.save_perplexity(model_name, current_perplexity) - self.assertAlmostEqual( baseline_perplexity["mean_perplexity"], current_perplexity["mean_perplexity"], @@ -180,8 +165,6 @@ def test_llama3_405B_f16_non_decomposed(self): ] ) - self.save_perplexity(model_name, current_perplexity) - self.assertAlmostEqual( baseline_perplexity["mean_perplexity"], current_perplexity["mean_perplexity"], @@ -208,8 +191,6 @@ def test_llama3_405B_fp8_decomposed(self): ] ) - self.save_perplexity(model_name, current_perplexity) - self.assertAlmostEqual( baseline_perplexity["mean_perplexity"], current_perplexity["mean_perplexity"], @@ -237,8 +218,6 @@ def test_llama3_405B_fp8_non_decomposed(self): ] ) - self.save_perplexity(model_name, current_perplexity) - self.assertAlmostEqual( baseline_perplexity["mean_perplexity"], current_perplexity["mean_perplexity"],