feat: OverFast API v3 (#208)

* feat: refactored entire project and caching system * feat: added dynamic ttl display on documentation. added player cache expiration * feat: renamed parsers * feat: refactored validation error handler * fix: ignoring SLF001 on tests * feat: final tweaks and README update * fix: updated README diagrams * fix: fixes after review and first test PR * fix: fixed last typing issue
TeKrop · Nov 3, 2024 · 7d24e14 · 7d24e14
1 parent 2977674
commit 7d24e14
Show file tree

Hide file tree

Showing 174 changed files with 2,542 additions and 3,635 deletions.
diff --git a/.env.dist b/.env.dist
@@ -5,31 +5,26 @@ APP_PORT=80
 # Application settings
 APP_BASE_URL=https://overfast-api.tekrop.fr
 LOG_LEVEL=info
-MAX_CONCURRENT_REQUESTS=5
 STATUS_PAGE_URL=
 
 # Rate limiting
 BLIZZARD_RATE_LIMIT_RETRY_AFTER=5
-RATE_LIMIT_PER_SECOND_PER_IP=10
-RATE_LIMIT_PER_IP_BURST=2
-MAX_CONNECTIONS_PER_IP=5
+RATE_LIMIT_PER_SECOND_PER_IP=30
+RATE_LIMIT_PER_IP_BURST=5
+MAX_CONNECTIONS_PER_IP=10
 
 # Redis
-REDIS_CACHING_ENABLED=true
 REDIS_HOST=redis
 REDIS_PORT=6379
 
 # Cache configuration
-BACKGROUND_CACHE_REFRESH_ENABLED=true
-EXPIRED_CACHE_REFRESH_LIMIT=3600
+PLAYER_CACHE_TIMEOUT=259200
 HEROES_PATH_CACHE_TIMEOUT=86400
 HERO_PATH_CACHE_TIMEOUT=86400
 CSV_CACHE_TIMEOUT=86400
-CAREER_PATH_CACHE_TIMEOUT=7200
-SEARCH_ACCOUNT_PATH_CACHE_TIMEOUT=3600
+CAREER_PATH_CACHE_TIMEOUT=3600
+SEARCH_ACCOUNT_PATH_CACHE_TIMEOUT=600
 SEARCH_DATA_TIMEOUT=7200
-CAREER_PARSER_CACHE_EXPIRATION_TIMEOUT=604800
-PARSER_CACHE_EXPIRATION_SPREADING_PERCENTAGE=25
 
 # Critical error Discord webhook
 DISCORD_WEBHOOK_ENABLED=false

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@ default_language_version:
     python: python3.12
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.6.9
+    rev: v0.7.2
     hooks:
       - id: ruff
         name: (ruff) Linting and fixing code

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -9,22 +9,22 @@ As of now, only some specific stuff can easily be updated by anyone, even withou
 - The CSV file containing the list of maps of the game
 
 ## 🦸 Heroes data
-The CSV file containing heroes statistics data is located in `app/data/heroes.csv`. Data is divided into 6 columns :
+The CSV file containing heroes statistics data is located in `app/heroes/data/heroes.csv`. Data is divided into 6 columns :
 - `key` : Key of the hero name, used in URLs of the API (and by Blizzard for their pages)
 - `name` : Display name of the hero (with the right accentuation). Used in the documentation.
 - `role` : Role key of the hero, which is either `damage`, `support` or `tank`
-- `health` : Health of the hero
+- `health` : Health of the hero (in Role Queue)
 - `armor` : Armor of the hero, mainly possessed by tanks
 - `shields` : Shields of the hero
 
 ## 🎲 Gamemodes list
-The CSV file containing gamemodes list is located in `app/data/gamemodes.csv`. Data is divided into 3 columns :
+The CSV file containing gamemodes list is located in `app/gamemodes/data/gamemodes.csv`. Data is divided into 3 columns :
 - `key` : Key of the gamemode, used in URLs of the API, and for the name of the corresponding screenshot and icon files
 - `name` : Name of the gamemode (in english)
 - `description` : Description of the gamemode (in english)
 
 ## 🗺️ Maps list
-The CSV file containing maps list is located in `app/data/maps.csv`. Data is divided into 5 columns :
+The CSV file containing maps list is located in `app/maps/data/maps.csv`. Data is divided into 5 columns :
 - `key` : Key of the map, used in URLs of the API, and for the name of the corresponding screenshot file
 - `name` : Name of the map (in english)
 - `gamemodes` : List of gamemodes in which the map is playable by default

diff --git a/Makefile b/Makefile
@@ -8,7 +8,9 @@ CYAN := \033[1;36m
 RESET := \033[0m
 
 # Aliases
-DOCKER_RUN := docker compose run \
+DOCKER_COMPOSE := docker compose
+
+DOCKER_RUN := $(DOCKER_COMPOSE) run \
 	--volume ${PWD}/app:/code/app \
 	--volume ${PWD}/tests:/code/tests \
 	--volume ${PWD}/htmlcov:/code/htmlcov \
@@ -25,11 +27,16 @@ help: ## Show this help message
 
 build: ## Build project images
 	@echo "Building OverFastAPI (dev mode)..."
-	BUILD_TARGET="dev" docker compose build
+	BUILD_TARGET="dev" $(DOCKER_COMPOSE) build
 
-start: ## Run OverFastAPI application (dev mode)
-	@echo "Launching OverFastAPI (dev mode)..."
+start: ## Run OverFastAPI application (dev or testing mode)
+ifdef TESTING_MODE
+	@echo "Launching OverFastAPI (testing mode with reverse proxy)..."
+	$(DOCKER_COMPOSE) --profile testing up -d
+else
+	@echo "Launching OverFastAPI (dev mode with autoreload)..."
 	$(DOCKER_RUN) uv run fastapi dev app/main.py --host 0.0.0.0
+endif
 
 lint: ## Run linter
 	@echo "Running linter..."
@@ -53,24 +60,20 @@ ifdef PYTEST_ARGS
 	$(DOCKER_RUN) uv run python -m pytest $(PYTEST_ARGS)
 else
 	@echo "Running all tests with coverage..."
-	$(DOCKER_RUN) uv run python -m pytest --cov app --cov-report html -n auto
+	$(DOCKER_RUN) uv run python -m pytest --cov app/ --cov-report html -n auto tests/
 endif
 
 up: ## Build & run OverFastAPI application (production mode)
 	@echo "Building OverFastAPI (production mode)..."
-	docker compose build
+	$(DOCKER_COMPOSE) build
 	@echo "Stopping OverFastAPI and cleaning containers..."
-	docker compose down -v --remove-orphans
+	$(DOCKER_COMPOSE) down -v --remove-orphans
 	@echo "Launching OverFastAPI (production mode)..."
-	docker compose up -d
+	$(DOCKER_COMPOSE) up -d
 
 down: ## Stop the app and remove containers
 	@echo "Stopping OverFastAPI and cleaning containers..."
-	docker compose --profile "*" down  -v --remove-orphans
-
-up-testing: ## Run OverFastAPI application (testing mode)
-	@echo "Launching OverFastAPI (testing mode)..."
-	docker compose --profile testing up -d
+	$(DOCKER_COMPOSE) --profile "*" down  -v --remove-orphans
 
 clean: down ## Clean up Docker environment
 	@echo "Cleaning Docker environment..."

diff --git a/README.md b/README.md
@@ -23,11 +23,11 @@
 
 
 ## ✨ [Live instance](https://overfast-api.tekrop.fr)
-The live instance is restricted with a rate limit around 10 req/s per IP (a shared limit across all endpoints). This limit may be adjusted as needed. If you require higher throughput, consider hosting your own instance on a server 👍
+The live instance operates with a rate limit applied per second, shared across all endpoints. You can view the current rate limit on the home page, and this limit may be adjusted as needed. For higher request throughput, consider hosting your own instance on a dedicated server 👍
 
 - Live instance (Redoc documentation) : https://overfast-api.tekrop.fr/
 - Swagger UI : https://overfast-api.tekrop.fr/docs
-- Status page : https://stats.uptimerobot.com/E0k0yU1pJQ
+- Status page : https://uptime-overfast-api.tekrop.fr/
 
 ## 🐋 Run for production
 Running the project is straightforward. Ensure you have `docker` and `docker compose` installed. Next, generate a `.env` file using the provided `.env.dist` template. Finally, execute the following command:
@@ -42,11 +42,11 @@ Same as earlier, ensure you have `docker` and `docker compose` installed, and ge
 Then, execute the following commands to launch the dev server :
 
 ```shell
-make build     # Build the images, needed for all further commands
-make start     # Launch OverFast API (dev mode)
-make testing   # Launch OverFast API (testing mode with reverse proxy)
+make build                     # Build the images, needed for all further commands
+make start                     # Launch OverFast API (dev mode with autoreload)
+make start TESTING_MODE=true   # Launch OverFast API (testing mode, with reverse proxy)
 ```
-The dev server will be running on the port `8000`. You can use the `make down` command to stop and remove the containers. Feel free to type `make` or `make help` to access a comprehensive list of all available commands for your reference.
+The dev server will be running on the port `8000`. Reverse proxy will be running on the port `8080` in testing mode. You can use the `make down` command to stop and remove the containers. Feel free to type `make` or `make help` to access a comprehensive list of all available commands for your reference.
 
 ### Generic settings
 Should you wish to customize according to your specific requirements, here is a detailed list of available settings:
@@ -73,7 +73,7 @@ Running tests with coverage (default)
 make test
 ```
 
-Running tests with given args
+Running tests with given args (without coverage)
 ```shell
 make test PYTEST_ARGS="tests/common"
 ```
@@ -94,46 +94,68 @@ In player career statistics, various conversions are applied for ease of use:
 - **Percent values** are represented as **integers**, omitting the percent symbol
 - Integer and float string representations are converted to their respective types
 
-### API Cache and Parser Cache
+### Redis caching
 
-OverFast API integrates a **Redis**-based cache system, divided into two main components:
+OverFast API integrates a **Redis**-based cache system, divided into three main components:
 - **API Cache**: This high-level cache associates URIs (cache keys) with raw JSON data. Upon the initial request, if a cache entry exists, the **nginx** server returns the JSON data directly. Cached values are stored with varying TTL (Time-To-Live) parameters depending on the requested route.
-- **Parser Cache**: Specifically designed for the API's parsing system, this cache stores parsing results (JSON objects) from HTML Blizzard pages. Its purpose is to minimize calls to Blizzard servers when requests involve filters. The cached values are refreshed in the background prior to expiration.
+- **Player Cache**: Specifically designed for the API's players data endpoints, this cache stores both HTML Blizzard pages (`profile`) and search results (`summary`) for a given player. Its purpose is to minimize calls to Blizzard servers whenever an associated API Cache is expired, and player's career hasn't changed since last call, by using `lastUpdated` value from `summary`. This cache will only expire if not accessed for a given TTL (default is 3 days).
+- **Search Data Cache**: Cache the player search endpoint to store mappings between `avatar`, `namecard`, and `title` URLs and their corresponding IDs. On profile pages, only the ID values are accessible, so we initialize this "Search Data" cache when the app launches.
 
-Here is the list of all TTL values configured for API Cache :
+Below is the current list of TTL values configured for the API cache. The latest values are available on the API homepage.
 * Heroes list : 1 day
 * Hero specific data : 1 day
 * Roles list : 1 day
 * Gamemodes list : 1 day
 * Maps list : 1 day
 * Players career : 1 hour
-* Players search : 1 hour
+* Players search : 10 min
 
-### Refresh-Ahead cache system
+## 🐍 Architecture
+
+### Default case
+
+The default case is pretty straightforward. When a `User` makes an API request, `Nginx` first checks `Redis` for cached data :
+* If available, `Redis` returns the data directly to `Nginx`, which forwards it to the `User` (cache hit).
+* If the cache is empty (cache miss), `Nginx` sends the request to the `App` server, which retrieves and parses data from `Blizzard`.
+
+The `App` then stores this data in `Redis` and returns it to `Nginx`, which sends the response to the `User`. This approach minimizes external requests and speeds up response times by prioritizing cached data.
 
 ```mermaid
 sequenceDiagram
     autonumber
+    actor User
+    participant Nginx
     participant Redis
-    participant Worker
+    participant App
     participant Blizzard
-    Worker->>+Redis: Request expiring Parser Cache
-    Redis-->>-Worker: Return expiring Parser Cache
-    alt Some Parser Cache will expire
-        Worker->>+Blizzard: Request up-to-date data
-        Blizzard-->>-Worker: Return up-to-date data
-        Worker->>+Redis: Update cache values
+    User->>+Nginx: Make an API request
+    Nginx->>+Redis: Make an API Cache request
+    alt API Cache is available
+        Redis-->>Nginx: Return API Cache data
+        Nginx-->>User: Return API Cache data
+    else
+        Redis-->>-Nginx: Return no result
+        Nginx->>+App: Transmit the request to App server
+        App->>+Blizzard: Retrieve data
+        Blizzard-->>-App: Return data
+        App->>App: Parse HTML page
+        App->>Redis: Store data into API Cache
+        App-->>-Nginx: Return API data
+        Nginx-->>-User: Return API data
     end
 ```
 
-To minimize requests to Blizzard servers, a Refresh-Ahead cache system has been deployed.
+### Player profile case
 
-Upon the initial request for a player's career page, there may be a slight delay (approximately 2-3 seconds) as data is fetched from Blizzard. Following this, the computed data is cached in the Parser Cache, which is subsequently refreshed in the background by a dedicated worker, before expiration. Additionally, the final data is stored in the API Cache, which is generated only upon user requests.
+The player profile request flow is similar to the previous setup, but with an extra layer of caching for player-specific data, including HTML data (profile page) and player search data (JSON data).
 
-This approach ensures that subsequent requests for the same career page are exceptionally swift, significantly enhancing user experience.
+When a `User` makes an API request, `Nginx` checks `Redis` for cached API data. If found, it’s returned directly. If not, `Nginx` forwards the request to the `App` server.
 
+It then retrieves Search data from `Blizzard` and checks a Player Cache in `Redis` :
+* If the player data is cached and up-to-date (`lastUpdated` from Search data has not changed), `App` parses it
+* If not, `App` retrieves and parses the data from `Blizzard`, then stores it in both the Player Cache and API Cache.
 
-## 🐍 Architecture
+This additional Player Cache layer reduces external calls for player-specific data, especially when player career hasn't changed, improving performance and response times.
 
 ```mermaid
 sequenceDiagram
@@ -142,6 +164,7 @@ sequenceDiagram
     participant Nginx
     participant Redis
     participant App
+    participant Blizzard
     User->>+Nginx: Make an API request
     Nginx->>+Redis: Make an API Cache request
     alt API Cache is available
@@ -150,21 +173,26 @@ sequenceDiagram
     else
         Redis-->>-Nginx: Return no result
         Nginx->>+App: Transmit the request to App server
-        App->>+Redis: Make Parser Cache request
-        alt Parser Cache is available
-            Redis-->>App: Return Parser Cache
+        App->>+Blizzard: Make a Player Search request
+        Blizzard-->>-App: Return Player Search data
+        App->>+Redis: Make Player Cache request
+        alt Player Cache is available and up-to-date
+            Redis-->>App: Return Player Cache
+            App->>App: Parse HTML page
         else
             Redis-->>-App: Return no result
+            App->>+Blizzard: Retrieve Player data (HTML)
+            Blizzard-->>-App: Return Player data
             App->>App: Parse HTML page
+            App->>Redis: Store data into Player Cache
         end
+        App->>Redis: Store data into API Cache
         App-->>-Nginx: Return API data
         Nginx-->>-User: Return API data
     end
 
 ```
 
-Utilizing `docker compose`, this architecture provides response cache saving into Redis. Subsequent requests are then directly served by nginx without involving the Python server at all. This approach strikes the optimal performance balance, leveraging nginx's efficiency in serving static content. Depending on the configured Blizzard pages, a single request may trigger multiple Parser Cache requests.
-
 ## 🤝 Contributing
 
 Contributions, issues and feature requests are welcome ! Do you want to update the heroes data (health, armor, shields, etc.) or the maps list ? Don't hesitate to consult the dedicated [CONTRIBUTING file](https://github.com/TeKrop/overfast-api/blob/main/CONTRIBUTING.md).