Merge pull request #118 from mendableai/feat/test-suite

[Test] Added integration tests suite
mendableai · May 8, 2024 · 4a5f876 · 4a5f876
2 parents 6956e50 + 0fae15a
commit 4a5f876
Show file tree

Hide file tree

Showing 19 changed files with 3,420 additions and 3 deletions.
diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml
@@ -61,7 +61,7 @@ jobs:
   deploy:
     name: Deploy app
     runs-on: ubuntu-latest
-    needs: pre-deploy
+    needs: pre-deploy-test-suite
     steps:
       - uses: actions/checkout@v3
       - name: Change directory

diff --git a/.github/workflows/test_suite.yml b/.github/workflows/test_suite.yml
@@ -0,0 +1,62 @@
+name: Test Suite
+on:
+  push:
+    branches:
+      - main
+
+env:
+  ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+  BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
+  FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
+  HOST: ${{ secrets.HOST }}
+  LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
+  LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
+  POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
+  POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
+  NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
+  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }}
+  PORT: ${{ secrets.PORT }}
+  REDIS_URL: ${{ secrets.REDIS_URL }}
+  SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }}
+  SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }}
+  SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
+  SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
+  TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
+
+
+jobs:
+  pre-deploy:
+    name: Pre-deploy checks
+    runs-on: ubuntu-latest
+    services:
+      redis:
+        image: redis
+        ports:
+          - 6379:6379
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Node.js
+        uses: actions/setup-node@v3
+        with:
+          node-version: "20"
+      - name: Install pnpm
+        run: npm install -g pnpm
+      - name: Install dependencies
+        run: pnpm install
+        working-directory: ./apps/api
+      - name: Start the application
+        run: npm start &
+        working-directory: ./apps/api
+        id: start_app
+      - name: Start workers
+        run: npm run workers &
+        working-directory: ./apps/api
+        id: start_workers
+      - name: Install dependencies
+        run: pnpm install
+        working-directory: ./apps/test-suite
+      - name: Run E2E tests
+        run: |
+          npm run test
+        working-directory: ./apps/test-suite
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,9 @@ dump.rdb
 apps/js-sdk/node_modules/
 
 apps/api/.env.local
+
+apps/test-suite/node_modules/
+
+
+apps/test-suite/.env
+apps/test-suite/logs
diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts
@@ -38,7 +38,7 @@ export async function supaAuthenticateUser(
       req.socket.remoteAddress) as string;
     const iptoken = incomingIP + token;
     await getRateLimiter(
-      token === "this_is_just_a_preview_token" ? RateLimiterMode.Preview : mode
+      token === "this_is_just_a_preview_token" ? RateLimiterMode.Preview : mode, token
     ).consume(iptoken);
   } catch (rateLimiterRes) {
     console.error(rateLimiterRes);

diff --git a/apps/api/src/services/rate-limiter.ts b/apps/api/src/services/rate-limiter.ts
@@ -69,7 +69,11 @@ export function crawlRateLimit(plan: string){
 
 
 
-export function getRateLimiter(mode: RateLimiterMode){
+export function getRateLimiter(mode: RateLimiterMode, token: string){
+  // Special test suite case. TODO: Change this later.
+  if(token.includes("5089cefa58")){
+    return crawlStatusRateLimiter;
+  }
   switch(mode) {
     case RateLimiterMode.Preview:
       return previewRateLimiter;

diff --git a/apps/test-suite/.env.example b/apps/test-suite/.env.example
@@ -0,0 +1,5 @@
+OPENAI_API_KEY=
+TEST_API_KEY=
+TEST_URL=http://localhost:3002
+ANTHROPIC_API_KEY=
+ENV=
diff --git a/apps/test-suite/README.md b/apps/test-suite/README.md
@@ -0,0 +1,43 @@
+# Test Suite for Firecrawl
+
+This document provides an overview of the test suite for the Firecrawl project. It includes instructions on how to run the tests and interpret the results.
+
+## Overview
+
+The test suite is designed to ensure the reliability and performance of the Firecrawl system. It includes a series of automated tests that check various functionalities and performance metrics.
+
+## Running the Tests
+
+To run the tests, navigate to the `test-suite` directory and execute the following command:
+
+```bash
+npm install
+npx playwright install
+npm run test
+```
+
+## Test Results
+
+The tests are designed to cover various aspects of the system, including:
+
+- Crawling accuracy
+- Response time
+- Error handling
+
+### Example Test Case
+
+- **Test Name**: Accuracy Test
+- **Description**: This test checks the accuracy of the scraping mechanism with 100 pages and a fuzzy threshold of 0.8.
+- **Expected Result**: Accuracy >= 0.9
+- **Received Result**: Accuracy between 0.2 and 0.3
+
+## Troubleshooting
+
+If you encounter any failures or unexpected results, please check the following:
+- Ensure your network connection is stable.
+- Verify that all dependencies are correctly installed.
+- Review the error logs for any specific error messages.
+
+## Contributing
+
+Contributions to the test suite are welcome. Please refer to the project's main [CONTRIBUTING.md](../CONTRIBUTING.md) file for guidelines on how to contribute.
diff --git a/apps/test-suite/data/websites.json b/apps/test-suite/data/websites.json
@@ -0,0 +1,113 @@
+[
+  {
+    "website": "https://www.anthropic.com/claude",
+    "prompt": "Does this website contain pricing information?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://mendable.ai/pricing",
+    "prompt": "Does this website contain pricing information?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://openai.com/news",
+    "prompt": "Does this website contain a list of research news?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://agentops.ai",
+    "prompt": "Does this website contain a code snippets?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://ycombinator.com/companies",
+    "prompt": "Does this website contain a list bigger than 5 of ycombinator companies?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://firecrawl.dev",
+    "prompt": "Does this website contain a list bigger than 5 of ycombinator companies?",
+    "expected_output": "no"
+  },
+  {
+    "website": "https://en.wikipedia.org/wiki/T._N._Seshan",
+    "prompt": "Does this website talk about Seshan's career?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://mendable.ai/blog",
+    "prompt": "Does this website contain multiple blog articles?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://mendable.ai/blog",
+    "prompt": "Does this website contain multiple blog articles?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://news.ycombinator.com/",
+    "prompt": "Does this website contain a list of articles in a table markdown format?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://www.vellum.ai/llm-leaderboard",
+    "prompt": "Does this website contain a model comparison table?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://www.bigbadtoystore.com",
+    "prompt": "are there more than 3 toys in the new arrivals section?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://www.instructables.com",
+    "prompt": "Does the site offer more than 5 links about circuits?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://www.powells.com",
+    "prompt": "is there at least 10 books webpage links?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://www.royalacademy.org.uk",
+    "prompt": "is there information on upcoming art exhibitions?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://www.eastbaytimes.com",
+    "prompt": "Is there a Trending Nationally section that lists articles?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://www.manchestereveningnews.co.uk",
+    "prompt": "is the content focused on Manchester sports news?",
+    "expected_output": "no"
+  },
+  {
+    "website": "https://physicsworld.com",
+    "prompt": "does the site provide at least 15 updates on the latest physics research?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://richmondconfidential.org",
+    "prompt": "does the page contains more than 4 articles?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://www.techinasia.com",
+    "prompt": "is there at least 10 articles of the startup scene in Asia?",
+    "expected_output": "yes",
+    "notes": "The website has a paywall and bot detectors."
+  },
+  {
+    "website": "https://www.boardgamegeek.com",
+    "prompt": "are there more than 5 board game news?",
+    "expected_output": "yes"
+  },
+  {
+    "website": "https://www.mountainproject.com",
+    "prompt": "Are there more than 3 climbing guides for Arizona?",
+    "expected_output": "yes"
+  }
+]