Skip to content

Commit

Permalink
Merge pull request #118 from mendableai/feat/test-suite
Browse files Browse the repository at this point in the history
[Test] Added integration tests suite
  • Loading branch information
nickscamara authored May 8, 2024
2 parents 6956e50 + 0fae15a commit 4a5f876
Show file tree
Hide file tree
Showing 19 changed files with 3,420 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/fly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
deploy:
name: Deploy app
runs-on: ubuntu-latest
needs: pre-deploy
needs: pre-deploy-test-suite
steps:
- uses: actions/checkout@v3
- name: Change directory
Expand Down
62 changes: 62 additions & 0 deletions .github/workflows/test_suite.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
name: Test Suite
on:
push:
branches:
- main

env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
HOST: ${{ secrets.HOST }}
LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }}
PORT: ${{ secrets.PORT }}
REDIS_URL: ${{ secrets.REDIS_URL }}
SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }}
SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }}
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}


jobs:
pre-deploy:
name: Pre-deploy checks
runs-on: ubuntu-latest
services:
redis:
image: redis
ports:
- 6379:6379
steps:
- uses: actions/checkout@v3
- name: Set up Node.js
uses: actions/setup-node@v3
with:
node-version: "20"
- name: Install pnpm
run: npm install -g pnpm
- name: Install dependencies
run: pnpm install
working-directory: ./apps/api
- name: Start the application
run: npm start &
working-directory: ./apps/api
id: start_app
- name: Start workers
run: npm run workers &
working-directory: ./apps/api
id: start_workers
- name: Install dependencies
run: pnpm install
working-directory: ./apps/test-suite
- name: Run E2E tests
run: |
npm run test
working-directory: ./apps/test-suite
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,9 @@ dump.rdb
apps/js-sdk/node_modules/

apps/api/.env.local

apps/test-suite/node_modules/


apps/test-suite/.env
apps/test-suite/logs
2 changes: 1 addition & 1 deletion apps/api/src/controllers/auth.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ export async function supaAuthenticateUser(
req.socket.remoteAddress) as string;
const iptoken = incomingIP + token;
await getRateLimiter(
token === "this_is_just_a_preview_token" ? RateLimiterMode.Preview : mode
token === "this_is_just_a_preview_token" ? RateLimiterMode.Preview : mode, token
).consume(iptoken);
} catch (rateLimiterRes) {
console.error(rateLimiterRes);
Expand Down
6 changes: 5 additions & 1 deletion apps/api/src/services/rate-limiter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,11 @@ export function crawlRateLimit(plan: string){



export function getRateLimiter(mode: RateLimiterMode){
export function getRateLimiter(mode: RateLimiterMode, token: string){
// Special test suite case. TODO: Change this later.
if(token.includes("5089cefa58")){
return crawlStatusRateLimiter;
}
switch(mode) {
case RateLimiterMode.Preview:
return previewRateLimiter;
Expand Down
5 changes: 5 additions & 0 deletions apps/test-suite/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
OPENAI_API_KEY=
TEST_API_KEY=
TEST_URL=http://localhost:3002
ANTHROPIC_API_KEY=
ENV=
43 changes: 43 additions & 0 deletions apps/test-suite/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Test Suite for Firecrawl

This document provides an overview of the test suite for the Firecrawl project. It includes instructions on how to run the tests and interpret the results.

## Overview

The test suite is designed to ensure the reliability and performance of the Firecrawl system. It includes a series of automated tests that check various functionalities and performance metrics.

## Running the Tests

To run the tests, navigate to the `test-suite` directory and execute the following command:

```bash
npm install
npx playwright install
npm run test
```

## Test Results

The tests are designed to cover various aspects of the system, including:

- Crawling accuracy
- Response time
- Error handling

### Example Test Case

- **Test Name**: Accuracy Test
- **Description**: This test checks the accuracy of the scraping mechanism with 100 pages and a fuzzy threshold of 0.8.
- **Expected Result**: Accuracy >= 0.9
- **Received Result**: Accuracy between 0.2 and 0.3

## Troubleshooting

If you encounter any failures or unexpected results, please check the following:
- Ensure your network connection is stable.
- Verify that all dependencies are correctly installed.
- Review the error logs for any specific error messages.

## Contributing

Contributions to the test suite are welcome. Please refer to the project's main [CONTRIBUTING.md](../CONTRIBUTING.md) file for guidelines on how to contribute.
113 changes: 113 additions & 0 deletions apps/test-suite/data/websites.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
[
{
"website": "https://www.anthropic.com/claude",
"prompt": "Does this website contain pricing information?",
"expected_output": "yes"
},
{
"website": "https://mendable.ai/pricing",
"prompt": "Does this website contain pricing information?",
"expected_output": "yes"
},
{
"website": "https://openai.com/news",
"prompt": "Does this website contain a list of research news?",
"expected_output": "yes"
},
{
"website": "https://agentops.ai",
"prompt": "Does this website contain a code snippets?",
"expected_output": "yes"
},
{
"website": "https://ycombinator.com/companies",
"prompt": "Does this website contain a list bigger than 5 of ycombinator companies?",
"expected_output": "yes"
},
{
"website": "https://firecrawl.dev",
"prompt": "Does this website contain a list bigger than 5 of ycombinator companies?",
"expected_output": "no"
},
{
"website": "https://en.wikipedia.org/wiki/T._N._Seshan",
"prompt": "Does this website talk about Seshan's career?",
"expected_output": "yes"
},
{
"website": "https://mendable.ai/blog",
"prompt": "Does this website contain multiple blog articles?",
"expected_output": "yes"
},
{
"website": "https://mendable.ai/blog",
"prompt": "Does this website contain multiple blog articles?",
"expected_output": "yes"
},
{
"website": "https://news.ycombinator.com/",
"prompt": "Does this website contain a list of articles in a table markdown format?",
"expected_output": "yes"
},
{
"website": "https://www.vellum.ai/llm-leaderboard",
"prompt": "Does this website contain a model comparison table?",
"expected_output": "yes"
},
{
"website": "https://www.bigbadtoystore.com",
"prompt": "are there more than 3 toys in the new arrivals section?",
"expected_output": "yes"
},
{
"website": "https://www.instructables.com",
"prompt": "Does the site offer more than 5 links about circuits?",
"expected_output": "yes"
},
{
"website": "https://www.powells.com",
"prompt": "is there at least 10 books webpage links?",
"expected_output": "yes"
},
{
"website": "https://www.royalacademy.org.uk",
"prompt": "is there information on upcoming art exhibitions?",
"expected_output": "yes"
},
{
"website": "https://www.eastbaytimes.com",
"prompt": "Is there a Trending Nationally section that lists articles?",
"expected_output": "yes"
},
{
"website": "https://www.manchestereveningnews.co.uk",
"prompt": "is the content focused on Manchester sports news?",
"expected_output": "no"
},
{
"website": "https://physicsworld.com",
"prompt": "does the site provide at least 15 updates on the latest physics research?",
"expected_output": "yes"
},
{
"website": "https://richmondconfidential.org",
"prompt": "does the page contains more than 4 articles?",
"expected_output": "yes"
},
{
"website": "https://www.techinasia.com",
"prompt": "is there at least 10 articles of the startup scene in Asia?",
"expected_output": "yes",
"notes": "The website has a paywall and bot detectors."
},
{
"website": "https://www.boardgamegeek.com",
"prompt": "are there more than 5 board game news?",
"expected_output": "yes"
},
{
"website": "https://www.mountainproject.com",
"prompt": "Are there more than 3 climbing guides for Arizona?",
"expected_output": "yes"
}
]
Loading

0 comments on commit 4a5f876

Please sign in to comment.