diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 000000000..50cda2c13
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,34 @@
+name: Test
+
+on: [push, pull_request]
+
+jobs:
+  test:
+    runs-on: ${{ matrix.os }}
+    if: "!contains(github.event.head_commit.message, 'ci skip')"
+    strategy:
+      matrix:
+        os: [macos-latest, windows-latest, ubuntu-latest]
+    steps:
+    - name: Cancel previous run
+      uses: styfle/cancel-workflow-action@0.11.0
+      with:
+        access_token: ${{ github.token }}
+    - uses: actions/checkout@v3
+    - name: Set up Python 3.9
+      uses: actions/setup-python@v4
+      with:
+        python-version: 3.9
+    - name: Upgrade pip
+      run: |
+        python -m pip install --upgrade pip setuptools wheel
+    - name: Install dependencies
+      run: |
+        python -m pip install -r requirements.txt
+    - name: Test
+      env: 
+        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        OPENAI_ORGANIZATION: ${{ secrets.OPENAI_ORGANIZATION }}
+      run: |
+        python benchmark.py --task tasksolving/mgsm/gpt-3.5 --dataset_path data/mgsm/test_sample.jsonl --overwrite --output_path ci_smoke_test_output
+        python evaluate_math.py --path ci_smoke_test_output/results.jsonl --ci_smoke_test
\ No newline at end of file
diff --git a/data/mgsm/test_sample.jsonl b/data/mgsm/test_sample.jsonl
new file mode 100644
index 000000000..bdd20eb46
--- /dev/null
+++ b/data/mgsm/test_sample.jsonl
@@ -0,0 +1 @@
+{"question": "Sophia is thinking of taking a road trip in her car, and would like to know how far she can drive on a single tank of gas. She has traveled 100 miles since last filling her tank, and she needed to put in 4 gallons of gas to fill it up again. The owner's manual for her car says that her tank holds 12 gallons of gas. How many miles can Sophia drive on a single tank of gas?", "answer": null, "answer_number": 300, "equation_solution": null}
\ No newline at end of file
diff --git a/evaluate_math.py b/evaluate_math.py
index 44399da32..189c05a5d 100644
--- a/evaluate_math.py
+++ b/evaluate_math.py
@@ -7,6 +7,7 @@
 parser = ArgumentParser()
 parser.add_argument("--path", type=str, required=True)
 parser.add_argument("--max_line", type=int, default=1000000000000)
+parser.add_argument("--ci_smoke_test", action="store_true")
 args = parser.parse_args()
 
 
@@ -88,3 +89,5 @@ def check_corr(result: str, correct_solution: str, tol: float = 1e-3):
     err_cnts.append(err_cnt)
 print(final_accs)
 print(err_cnts)
+if args.ci_smoke_test is True:
+    assert final_accs[0] == 1.0
diff --git a/requirements.txt b/requirements.txt
index f52faa286..dc4985600 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,4 +15,5 @@ typing-extensions==4.5.0
 typing-inspect==0.8.0
 colorlog
 rapidfuzz
-spacy
\ No newline at end of file
+spacy
+colorama==0.4.6
\ No newline at end of file