Skip to content

Commit

Permalink
[a A] Fix: Facebook crawler excessively hits download endpoint (#6809,…
Browse files Browse the repository at this point in the history
… PR #6819)
  • Loading branch information
achave11-ucsc committed Jan 18, 2025
2 parents 0bb6aab + b83b557 commit 9a48715
Show file tree
Hide file tree
Showing 17 changed files with 328 additions and 177 deletions.
1 change: 0 additions & 1 deletion deployments/anvilbox/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ def env() -> Mapping[str, Optional[str]]:
#
'AZUL_DOMAIN_NAME': 'anvil.gi.ucsc.edu',
'AZUL_SUBDOMAIN_TEMPLATE': '*.{AZUL_DEPLOYMENT_STAGE}',
'AZUL_PRIVATE_API': '0',

'AZUL_CATALOGS': json.dumps({
f'{catalog}{suffix}': dict(atlas=atlas,
Expand Down
3 changes: 2 additions & 1 deletion deployments/anvildev/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ def env() -> Mapping[str, Optional[str]]:
'AZUL_DEPLOYMENT_STAGE': 'anvildev',

'AZUL_DOMAIN_NAME': 'anvil.gi.ucsc.edu',
'AZUL_PRIVATE_API': '0',

'azul_waf_bot_control': '1',

'AZUL_CATALOGS': json.dumps({
f'{catalog}{suffix}': dict(atlas=atlas,
Expand Down
1 change: 0 additions & 1 deletion deployments/anvilprod/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -966,7 +966,6 @@ def env() -> Mapping[str, Optional[str]]:
'AZUL_DEPLOYMENT_STAGE': 'anvilprod',

'AZUL_DOMAIN_NAME': 'explore.anvilproject.org',
'AZUL_PRIVATE_API': '0',

'AZUL_CATALOGS': json.dumps({
f'{catalog}{suffix}': dict(atlas=atlas,
Expand Down
2 changes: 2 additions & 0 deletions deployments/dev/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ def env() -> Mapping[str, Optional[str]]:
'AZUL_DOMAIN_NAME': '{AZUL_DEPLOYMENT_STAGE}.singlecell.gi.ucsc.edu',
'AZUL_DRS_DOMAIN_NAME': 'drs.dev.singlecell.gi.ucsc.edu',

'azul_waf_bot_control': '1',

'AZUL_CATALOGS': json.dumps({
f'{catalog}{suffix}': dict(atlas=atlas,
internal=internal,
Expand Down
1 change: 0 additions & 1 deletion deployments/hammerbox/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,7 +980,6 @@ def env() -> Mapping[str, Optional[str]]:
#
'AZUL_DOMAIN_NAME': 'explore.anvilproject.org',
'AZUL_SUBDOMAIN_TEMPLATE': '*.{AZUL_DEPLOYMENT_STAGE}',
'AZUL_PRIVATE_API': '0',

'AZUL_CATALOGS': json.dumps({
f'{catalog}{suffix}': dict(atlas=atlas,
Expand Down
2 changes: 1 addition & 1 deletion deployments/prod/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -1445,5 +1445,5 @@ def env() -> Mapping[str, Optional[str]]:
# region, so a weighted average is calculated based on the observed
# number of daily downloads per region.
#
# 'AZUL_FILE_DOWNLOAD_RATE_LIMIT': '59/[email protected]'
# 'azul_waf_download_rate_limit': '59/[email protected]'
}
1 change: 0 additions & 1 deletion deployments/tempdev/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ def env() -> Mapping[str, Optional[str]]:
'AZUL_DEPLOYMENT_STAGE': 'tempdev',

'AZUL_DOMAIN_NAME': 'temp.gi.ucsc.edu',
'AZUL_PRIVATE_API': '0',

'AZUL_S3_BUCKET': 'edu-ucsc-gi-platform-temp-dev-storage-{AZUL_DEPLOYMENT_STAGE}.{AWS_DEFAULT_REGION}',

Expand Down
10 changes: 9 additions & 1 deletion environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -946,5 +946,13 @@ def env() -> Mapping[str, Optional[str]]:
# for restrictions on the supported values for `<limit>` ("Rate limit")
# and `<window>` ("Evaluation window").
#
'AZUL_FILE_DOWNLOAD_RATE_LIMIT': None
'azul_waf_download_rate_limit': None,

# Wether to enable bot control in AWS WAF. Setting this to 1 will enable
# two rules aimed at blocking requests from suspected and verified bots.
# As of January 2024, this will incur monthly cost of $10 per ACL plus
# $1 per one million requests above ten million requests. The blocking
# only applies to URLs disallowed via robots.txt.
#
'azul_waf_bot_control': '0'
}
10 changes: 5 additions & 5 deletions lambdas/indexer/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@
'openapi': '3.0.1',
'info': {
'title': config.indexer_name,
'description': fd('''
This is the internal API for Azul's indexer component.
'''),
# This property should be updated in any PR connected to an issue
# The version property should be updated in any PR connected to an issue
# labeled `API`. Increment the major version for backwards incompatible
# changes and reset the minor version to zero. Otherwise, increment only
# the minor version for backwards compatible changes. A backwards
# compatible change is one that does not require updates to clients.
'version': '1.1'
'version': '3.1',
'description': fd('''
This is the internal API for Azul's indexer component.
''')
}
}

Expand Down
90 changes: 53 additions & 37 deletions lambdas/indexer/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
"openapi": "3.0.1",
"info": {
"title": "azul-indexer-dev",
"description": "\nThis is the internal API for Azul's indexer component.\n",
"version": "1.1"
"version": "3.1",
"description": "\nThis is the internal API for Azul's indexer component.\n"
},
"paths": {
"/": {
"get": {
"summary": "A redirect to the Swagger UI for interactive use of this REST API",
"summary": "Redirect to the Swagger UI for interactive use of this REST API",
"tags": [
"Auxiliary"
],
Expand All @@ -22,9 +22,9 @@
}
}
},
"/static/index.html": {
"/swagger/index.html": {
"get": {
"summary": "A Swagger UI for interactive use of this REST API",
"summary": "The Swagger UI for interactive use of this REST API",
"tags": [
"Auxiliary"
],
Expand All @@ -38,7 +38,7 @@
}
}
},
"/static/swagger-initializer.js": {
"/swagger/swagger-initializer.js": {
"get": {
"summary": "Used internally by the Swagger UI",
"tags": [
Expand All @@ -54,7 +54,37 @@
}
}
},
"/openapi": {
"/swagger/{file}": {
"parameters": [
{
"name": "file",
"in": "path",
"required": true,
"schema": {
"type": "string"
},
"description": "The name of a static file to be returned"
}
],
"get": {
"summary": "Static files needed for the Swagger UI",
"tags": [
"Auxiliary"
],
"responses": {
"200": {
"description": "The response body is the contents of the requested file"
},
"404": {
"description": "The requested file does not exist"
},
"504": {
"description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n"
}
}
}
},
"/openapi.json": {
"get": {
"summary": "Return OpenAPI specifications for this REST API",
"description": "\nThis endpoint returns the [OpenAPI specifications]'\n(https://github.com/OAI/OpenAPI-Specification) for this REST\nAPI. These are the specifications used to generate the page\nyou are visiting now.\n",
Expand Down Expand Up @@ -117,36 +147,6 @@
]
}
},
"/static/{file}": {
"parameters": [
{
"name": "file",
"in": "path",
"required": true,
"schema": {
"type": "string"
},
"description": "The name of a static file to be returned"
}
],
"get": {
"summary": "Static files needed for the Swagger UI",
"tags": [
"Auxiliary"
],
"responses": {
"200": {
"description": "The response body is the contents of the requested file"
},
"404": {
"description": "The requested file does not exist"
},
"504": {
"description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n"
}
}
}
},
"/version": {
"get": {
"summary": "Describe current version of this REST API",
Expand Down Expand Up @@ -225,6 +225,22 @@
}
}
},
"/robots.txt": {
"get": {
"summary": "Robots Exclusion Protocol",
"tags": [
"Auxiliary"
],
"responses": {
"200": {
"description": "\nThe robots.txt resource according to\n[RFC9309](https://datatracker.ietf.org/doc/html/rfc9309)\n"
},
"504": {
"description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n"
}
}
}
},
"/health": {
"get": {
"summary": "Complete health check",
Expand Down
14 changes: 7 additions & 7 deletions lambdas/service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@
'openapi': '3.0.1',
'info': {
'title': config.service_name,
# The version property should be updated in any PR connected to an issue
# labeled `API`. Increment the major version for backwards incompatible
# changes and reset the minor version to zero. Otherwise, increment only
# the minor version for backwards compatible changes. A backwards
# compatible change is one that does not require updates to clients.
'version': '12.1',
'description': fd(f'''
# Overview
Expand Down Expand Up @@ -222,13 +228,7 @@
Also notice that there is only one file. When querying a particular
index, the corresponding entity will always be a singleton like
this.
'''),
# This property should be updated in any PR connected to an issue
# labeled `API`. Increment the major version for backwards incompatible
# changes and reset the minor version to zero. Otherwise, increment only
# the minor version for backwards compatible changes. A backwards
# compatible change is one that does not require updates to clients.
'version': '10.1'
''')
},
'tags': [
{
Expand Down
Loading

0 comments on commit 9a48715

Please sign in to comment.