Skip to content

Commit

Permalink
Merge pull request #36 from mendableai/nsc/rate-limit-fixes
Browse files Browse the repository at this point in the history
Rate limit fixes for crawl status
  • Loading branch information
nickscamara authored Apr 20, 2024
2 parents 39dca60 + 5b3c75b commit d4e7774
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 13 deletions.
18 changes: 10 additions & 8 deletions apps/api/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { WebScraperDataProvider } from "./scraper/WebScraper";
import { billTeam, checkTeamCredits } from "./services/billing/credit_billing";
import { getRateLimiter, redisClient } from "./services/rate-limiter";
import { parseApi } from "./lib/parseApi";
import { RateLimiterMode } from "./types";

const { createBullBoard } = require("@bull-board/api");
const { BullAdapter } = require("@bull-board/api/bullAdapter");
Expand Down Expand Up @@ -46,7 +47,7 @@ app.get("/test", async (req, res) => {
res.send("Hello, world!");
});

async function authenticateUser(req, res, mode?: string): Promise<{ success: boolean, team_id?: string, error?: string, status?: number }> {
async function authenticateUser(req, res, mode?: RateLimiterMode): Promise<{ success: boolean, team_id?: string, error?: string, status?: number }> {
const authHeader = req.headers.authorization;
if (!authHeader) {
return { success: false, error: "Unauthorized", status: 401 };
Expand All @@ -56,19 +57,20 @@ async function authenticateUser(req, res, mode?: string): Promise<{ success: boo
return { success: false, error: "Unauthorized: Token missing", status: 401 };
}



try {
const incomingIP = (req.headers["x-forwarded-for"] ||
req.socket.remoteAddress) as string;
const iptoken = incomingIP + token;
await getRateLimiter(
token === "this_is_just_a_preview_token" ? true : false
await getRateLimiter((token === "this_is_just_a_preview_token") ? RateLimiterMode.Preview : mode
).consume(iptoken);
} catch (rateLimiterRes) {
console.error(rateLimiterRes);
return { success: false, error: "Rate limit exceeded. Too many requests, try again in 1 minute.", status: 429 };
}

if (token === "this_is_just_a_preview_token" && mode === "scrape") {
if (token === "this_is_just_a_preview_token" && (mode === RateLimiterMode.Scrape || mode === RateLimiterMode.Preview)) {
return { success: true, team_id: "preview" };
}

Expand All @@ -88,7 +90,7 @@ async function authenticateUser(req, res, mode?: string): Promise<{ success: boo
app.post("/v0/scrape", async (req, res) => {
try {
// make sure to authenticate user first, Bearer <token>
const { success, team_id, error, status } = await authenticateUser(req, res, "scrape");
const { success, team_id, error, status } = await authenticateUser(req, res, RateLimiterMode.Scrape);
if (!success) {
return res.status(status).json({ error });
}
Expand Down Expand Up @@ -164,7 +166,7 @@ app.post("/v0/scrape", async (req, res) => {

app.post("/v0/crawl", async (req, res) => {
try {
const { success, team_id, error, status } = await authenticateUser(req, res, "crawl");
const { success, team_id, error, status } = await authenticateUser(req, res, RateLimiterMode.Crawl);
if (!success) {
return res.status(status).json({ error });
}
Expand Down Expand Up @@ -230,7 +232,7 @@ app.post("/v0/crawl", async (req, res) => {
});
app.post("/v0/crawlWebsitePreview", async (req, res) => {
try {
const { success, team_id, error, status } = await authenticateUser(req, res, "scrape");
const { success, team_id, error, status } = await authenticateUser(req, res, RateLimiterMode.Preview);
if (!success) {
return res.status(status).json({ error });
}
Expand Down Expand Up @@ -259,7 +261,7 @@ app.post("/v0/crawlWebsitePreview", async (req, res) => {

app.get("/v0/crawl/status/:jobId", async (req, res) => {
try {
const { success, team_id, error, status } = await authenticateUser(req, res, "scrape");
const { success, team_id, error, status } = await authenticateUser(req, res, RateLimiterMode.CrawlStatus);
if (!success) {
return res.status(status).json({ error });
}
Expand Down
26 changes: 21 additions & 5 deletions apps/api/src/services/rate-limiter.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { RateLimiterRedis } from "rate-limiter-flexible";
import * as redis from "redis";
import { RateLimiterMode } from "../../src/types";

const MAX_REQUESTS_PER_MINUTE_PREVIEW = 5;
const MAX_CRAWLS_PER_MINUTE_STARTER = 2;
Expand All @@ -8,6 +9,9 @@ const MAX_CRAWLS_PER_MINUTE_SCALE = 20;

const MAX_REQUESTS_PER_MINUTE_ACCOUNT = 20;

const MAX_REQUESTS_PER_MINUTE_CRAWL_STATUS = 120;




export const redisClient = redis.createClient({
Expand All @@ -29,6 +33,13 @@ export const serverRateLimiter = new RateLimiterRedis({
duration: 60, // Duration in seconds
});

export const crawlStatusRateLimiter = new RateLimiterRedis({
storeClient: redisClient,
keyPrefix: "middleware",
points: MAX_REQUESTS_PER_MINUTE_CRAWL_STATUS,
duration: 60, // Duration in seconds
});


export function crawlRateLimit(plan: string){
if(plan === "standard"){
Expand Down Expand Up @@ -56,10 +67,15 @@ export function crawlRateLimit(plan: string){
}


export function getRateLimiter(preview: boolean){
if(preview){
return previewRateLimiter;
}else{
return serverRateLimiter;


export function getRateLimiter(mode: RateLimiterMode){
switch(mode) {
case RateLimiterMode.Preview:
return previewRateLimiter;
case RateLimiterMode.CrawlStatus:
return crawlStatusRateLimiter;
default:
return serverRateLimiter;
}
}
8 changes: 8 additions & 0 deletions apps/api/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,11 @@ export interface WebScraperOptions {



export enum RateLimiterMode {
Crawl = "crawl",
CrawlStatus = "crawl-status",
Scrape = "scrape",
Preview = "preview",
}


0 comments on commit d4e7774

Please sign in to comment.