diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 000000000..50cda2c13 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,34 @@ +name: Test + +on: [push, pull_request] + +jobs: + test: + runs-on: ${{ matrix.os }} + if: "!contains(github.event.head_commit.message, 'ci skip')" + strategy: + matrix: + os: [macos-latest, windows-latest, ubuntu-latest] + steps: + - name: Cancel previous run + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + - uses: actions/checkout@v3 + - name: Set up Python 3.9 + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Upgrade pip + run: | + python -m pip install --upgrade pip setuptools wheel + - name: Install dependencies + run: | + python -m pip install -r requirements.txt + - name: Test + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENAI_ORGANIZATION: ${{ secrets.OPENAI_ORGANIZATION }} + run: | + python benchmark.py --task tasksolving/mgsm/gpt-3.5 --dataset_path data/mgsm/test_sample.jsonl --overwrite --output_path ci_smoke_test_output + python evaluate_math.py --path ci_smoke_test_output/results.jsonl --ci_smoke_test \ No newline at end of file diff --git a/data/mgsm/test_sample.jsonl b/data/mgsm/test_sample.jsonl new file mode 100644 index 000000000..bdd20eb46 --- /dev/null +++ b/data/mgsm/test_sample.jsonl @@ -0,0 +1 @@ +{"question": "Sophia is thinking of taking a road trip in her car, and would like to know how far she can drive on a single tank of gas. She has traveled 100 miles since last filling her tank, and she needed to put in 4 gallons of gas to fill it up again. The owner's manual for her car says that her tank holds 12 gallons of gas. How many miles can Sophia drive on a single tank of gas?", "answer": null, "answer_number": 300, "equation_solution": null} \ No newline at end of file diff --git a/evaluate_math.py b/evaluate_math.py index 44399da32..189c05a5d 100644 --- a/evaluate_math.py +++ b/evaluate_math.py @@ -7,6 +7,7 @@ parser = ArgumentParser() parser.add_argument("--path", type=str, required=True) parser.add_argument("--max_line", type=int, default=1000000000000) +parser.add_argument("--ci_smoke_test", action="store_true") args = parser.parse_args() @@ -88,3 +89,5 @@ def check_corr(result: str, correct_solution: str, tol: float = 1e-3): err_cnts.append(err_cnt) print(final_accs) print(err_cnts) +if args.ci_smoke_test is True: + assert final_accs[0] == 1.0 diff --git a/requirements.txt b/requirements.txt index f52faa286..dc4985600 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,5 @@ typing-extensions==4.5.0 typing-inspect==0.8.0 colorlog rapidfuzz -spacy \ No newline at end of file +spacy +colorama==0.4.6 \ No newline at end of file