From 226b41a54f28725f451915a022c26df8ebcad165 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Bournhonesque?= Date: Fri, 11 Aug 2023 10:45:45 +0200 Subject: [PATCH] fix: add robots.txt --- robotoff/app/api.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/robotoff/app/api.py b/robotoff/app/api.py index 465e5c33d1..a8d6ed1854 100644 --- a/robotoff/app/api.py +++ b/robotoff/app/api.py @@ -1669,6 +1669,15 @@ def on_get(self, req: falcon.Request, resp: falcon.Response): resp.media = response +class RobotsTxtResource: + def on_get(self, req: falcon.Request, resp: falcon.Response): + # Disallow completely indexation: otherwise web crawlers send millions + # of requests to Robotoff (420k requests/day by Google alone) + resp.body = "User-agent: *\nDisallow: /\n" + resp.content_type = falcon.MEDIA_TEXT + resp.status = falcon.HTTP_200 + + cors = CORS( allow_all_origins=True, allow_all_headers=True, @@ -1731,3 +1740,4 @@ def on_get(self, req: falcon.Request, resp: falcon.Response): api.add_route("/api/v1/users/statistics/{username}", UserStatisticsResource()) api.add_route("/api/v1/predictions", PredictionCollection()) api.add_route("/api/v1/annotation/collection", LogoAnnotationCollection()) +api.add_route("/robots.txt", RobotsTxtResource())