From d6279eb7fc060fd68533341ea9cf6faaba721acb Mon Sep 17 00:00:00 2001 From: Madison Swain-Bowden Date: Wed, 12 Jul 2023 14:45:53 -0700 Subject: [PATCH 1/7] Correct and unify the logging for the ingestion server --- ingestion_server/gunicorn.conf.py | 38 ++++++++++++++++++++++++ ingestion_server/gunicorn_worker.conf.py | 36 ++++++++++++++++++++++ ingestion_server/ingestion_server/api.py | 12 -------- 3 files changed, 74 insertions(+), 12 deletions(-) diff --git a/ingestion_server/gunicorn.conf.py b/ingestion_server/gunicorn.conf.py index bc15b4447ec..3bd6daa1787 100644 --- a/ingestion_server/gunicorn.conf.py +++ b/ingestion_server/gunicorn.conf.py @@ -1,7 +1,45 @@ bind = ["0.0.0.0:8001"] capture_output = True +accesslog = "-" +errorlog = "-" chdir = "./ingestion_server/" timeout = 120 reload = True +logconfig_dict = { + # NOTE: Most of this is inherited from the default configuration + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "generic": { + "format": "[%(asctime)s - %(name)s - %(lineno)3d][%(levelname)s] %(message)s", # noqa: E501 + }, + }, + "root": {"level": "INFO", "handlers": ["console"]}, + "loggers": { + "gunicorn.error": { + "level": "DEBUG", + "handlers": ["console"], + "propagate": False, # Prevents default handler from also logging this + "qualname": "gunicorn.error", + }, + "gunicorn.access": { + "level": "INFO", + "handlers": ["console"], + "propagate": False, # Prevents default handler from also logging this + "qualname": "gunicorn.access", + }, + "": { + "level": "INFO", + "handlers": ["console"], + }, + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "formatter": "generic", + "stream": "ext://sys.stdout", + }, + }, +} loglevel = "debug" wsgi_app = "api:api" diff --git a/ingestion_server/gunicorn_worker.conf.py b/ingestion_server/gunicorn_worker.conf.py index df032c58e19..b5befdbab54 100644 --- a/ingestion_server/gunicorn_worker.conf.py +++ b/ingestion_server/gunicorn_worker.conf.py @@ -3,6 +3,42 @@ chdir = "./ingestion_server/" timeout = 120 reload = True +logconfig_dict = { + # NOTE: Most of this is inherited from the default configuration + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "generic": { + "format": "[%(asctime)s - %(name)s - %(lineno)3d][%(levelname)s] %(message)s", # noqa: E501 + }, + }, + "root": {"level": "INFO", "handlers": ["console"]}, + "loggers": { + "gunicorn.error": { + "level": "DEBUG", + "handlers": ["console"], + "propagate": False, # Prevents default handler from also logging this + "qualname": "gunicorn.error", + }, + "gunicorn.access": { + "level": "INFO", + "handlers": ["console"], + "propagate": False, # Prevents default handler from also logging this + "qualname": "gunicorn.access", + }, + "": { + "level": "INFO", + "handlers": ["console"], + }, + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "formatter": "generic", + "stream": "ext://sys.stdout", + }, + }, +} loglevel = "debug" wsgi_app = "indexer_worker:api" accesslog = "-" diff --git a/ingestion_server/ingestion_server/api.py b/ingestion_server/ingestion_server/api.py index c99f12f664c..54bc5057c20 100644 --- a/ingestion_server/ingestion_server/api.py +++ b/ingestion_server/ingestion_server/api.py @@ -2,7 +2,6 @@ import logging import os -import sys import time import uuid from multiprocessing import Process, Value @@ -326,17 +325,6 @@ def on_delete(_, __): def create_api(log=True): """Create an instance of the Falcon API server.""" - if log: - root = logging.getLogger() - root.setLevel(logging.DEBUG) - handler = logging.StreamHandler(sys.stdout) - handler.setLevel(logging.INFO) - formatter = logging.Formatter( - "%(asctime)s %(levelname)s %(filename)s:%(lineno)d - %(message)s" - ) - handler.setFormatter(formatter) - root.addHandler(handler) - _api = falcon.App() task_tracker = TaskTracker() From 6aee6dec4f3fddb784850d46a7b38d235c27ba43 Mon Sep 17 00:00:00 2001 From: Madison Swain-Bowden Date: Wed, 12 Jul 2023 15:07:45 -0700 Subject: [PATCH 2/7] Simplify ingestion server command, remove unused docker-compose file --- docker-compose.yml | 1 - ingestion_server/Dockerfile | 1 + ingestion_server/docker-compose.yml | 15 --------------- 3 files changed, 1 insertion(+), 16 deletions(-) delete mode 100644 ingestion_server/docker-compose.yml diff --git a/docker-compose.yml b/docker-compose.yml index e11c6b2b086..05db8447f8d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -240,7 +240,6 @@ services: args: # Automatically inferred from env vars, unless specified - INGESTION_PY_VERSION image: openverse-ingestion_server - command: gunicorn -c ./gunicorn.conf.py ports: - "50281:8001" depends_on: diff --git a/ingestion_server/Dockerfile b/ingestion_server/Dockerfile index 2b0e95b90c8..a5cfb888474 100644 --- a/ingestion_server/Dockerfile +++ b/ingestion_server/Dockerfile @@ -79,3 +79,4 @@ COPY --chown=ingestionu . /ingestion_server/ EXPOSE 8001 8002 # CMD is set from Docker Compose +CMD ["gunicorn", "-c", "./gunicorn.conf.py"] diff --git a/ingestion_server/docker-compose.yml b/ingestion_server/docker-compose.yml deleted file mode 100644 index 8bd64d2cce7..00000000000 --- a/ingestion_server/docker-compose.yml +++ /dev/null @@ -1,15 +0,0 @@ -version: "2.4" -services: - ingestion_server: - # Allow the image tag to be set to a specific version, default to latest - image: ghcr.io/wordpress/openverse-ingestion_server:${IMAGE_TAG:-latest} - ports: - - "8001:8001" - env_file: - - .env - dns: - # Related to DNS management on the production box. - # 172.16.0.0–172.31.255.255 is a reserved IP range by the (IANA) for - # private/internal networking. This specific IP is the DNS resolution address - # for a default AWS Virtual Private Cloud. - - 172.31.0.2 From 7aecaf5145f939f7e67921348e895379dba99b7e Mon Sep 17 00:00:00 2001 From: Madison Swain-Bowden Date: Wed, 12 Jul 2023 15:14:54 -0700 Subject: [PATCH 3/7] Remove unused param --- ingestion_server/ingestion_server/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ingestion_server/ingestion_server/api.py b/ingestion_server/ingestion_server/api.py index 54bc5057c20..240a095b400 100644 --- a/ingestion_server/ingestion_server/api.py +++ b/ingestion_server/ingestion_server/api.py @@ -322,7 +322,7 @@ def on_delete(_, __): clear_state() -def create_api(log=True): +def create_api(): """Create an instance of the Falcon API server.""" _api = falcon.App() From 6fbd212c3ca62926d27ded88901e96d65cc7993e Mon Sep 17 00:00:00 2001 From: Madison Swain-Bowden Date: Wed, 12 Jul 2023 15:17:32 -0700 Subject: [PATCH 4/7] Remove comment --- ingestion_server/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/ingestion_server/Dockerfile b/ingestion_server/Dockerfile index a5cfb888474..47d6de1e5f8 100644 --- a/ingestion_server/Dockerfile +++ b/ingestion_server/Dockerfile @@ -78,5 +78,4 @@ COPY --chown=ingestionu . /ingestion_server/ # - 8002: Gunicorn server for `indexer_worker` Falcon app EXPOSE 8001 8002 -# CMD is set from Docker Compose CMD ["gunicorn", "-c", "./gunicorn.conf.py"] From a1273049f426e869021faec5241d6dade8d616ef Mon Sep 17 00:00:00 2001 From: Madison Swain-Bowden Date: Wed, 12 Jul 2023 15:35:53 -0700 Subject: [PATCH 5/7] Remove logging config from indexer worker --- ingestion_server/ingestion_server/indexer_worker.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/ingestion_server/ingestion_server/indexer_worker.py b/ingestion_server/ingestion_server/indexer_worker.py index d6631b01ba7..3b280ffa4cb 100644 --- a/ingestion_server/ingestion_server/indexer_worker.py +++ b/ingestion_server/ingestion_server/indexer_worker.py @@ -6,7 +6,6 @@ """ import logging as log -import sys from multiprocessing import Process import boto3 @@ -111,15 +110,6 @@ def _self_destruct(): ec2_client.stop_instances(InstanceIds=[instance_id]) -root = log.getLogger() -root.setLevel(log.DEBUG) -handler = log.StreamHandler(sys.stdout) -handler.setLevel(log.INFO) -formatter = log.Formatter( - "%(asctime)s %(levelname)s %(filename)s:%(lineno)d - %(message)s" -) -handler.setFormatter(formatter) -root.addHandler(handler) api = falcon.App() api.add_route("/indexing_task", IndexingJobResource()) api.add_route("/healthcheck", HealthcheckResource()) From e024346eb143c4ce6ea25d7cc4e52765bcd21f67 Mon Sep 17 00:00:00 2001 From: Madison Swain-Bowden Date: Wed, 12 Jul 2023 15:36:27 -0700 Subject: [PATCH 6/7] Unify ingestion server and indexer worker config --- docker-compose.yml | 2 +- ingestion_server/Dockerfile | 2 +- ingestion_server/gunicorn.conf.py | 2 -- ingestion_server/gunicorn_worker.conf.py | 45 ------------------------ 4 files changed, 2 insertions(+), 49 deletions(-) delete mode 100644 ingestion_server/gunicorn_worker.conf.py diff --git a/docker-compose.yml b/docker-compose.yml index 05db8447f8d..6bff3f6e448 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -265,7 +265,7 @@ services: args: # Automatically inferred from env vars, unless specified - INGESTION_PY_VERSION image: openverse-ingestion_server - command: gunicorn -c ./gunicorn_worker.conf.py + command: gunicorn indexer_worker:api --bind 0.0.0.0:8002 expose: - "8002" depends_on: diff --git a/ingestion_server/Dockerfile b/ingestion_server/Dockerfile index 47d6de1e5f8..f3b67063957 100644 --- a/ingestion_server/Dockerfile +++ b/ingestion_server/Dockerfile @@ -78,4 +78,4 @@ COPY --chown=ingestionu . /ingestion_server/ # - 8002: Gunicorn server for `indexer_worker` Falcon app EXPOSE 8001 8002 -CMD ["gunicorn", "-c", "./gunicorn.conf.py"] +CMD ["gunicorn", "--bind", "0.0.0.0:8001", "api:api"] diff --git a/ingestion_server/gunicorn.conf.py b/ingestion_server/gunicorn.conf.py index 3bd6daa1787..8cf280ac576 100644 --- a/ingestion_server/gunicorn.conf.py +++ b/ingestion_server/gunicorn.conf.py @@ -1,4 +1,3 @@ -bind = ["0.0.0.0:8001"] capture_output = True accesslog = "-" errorlog = "-" @@ -42,4 +41,3 @@ }, } loglevel = "debug" -wsgi_app = "api:api" diff --git a/ingestion_server/gunicorn_worker.conf.py b/ingestion_server/gunicorn_worker.conf.py deleted file mode 100644 index b5befdbab54..00000000000 --- a/ingestion_server/gunicorn_worker.conf.py +++ /dev/null @@ -1,45 +0,0 @@ -bind = ["0.0.0.0:8002"] -capture_output = True -chdir = "./ingestion_server/" -timeout = 120 -reload = True -logconfig_dict = { - # NOTE: Most of this is inherited from the default configuration - "version": 1, - "disable_existing_loggers": False, - "formatters": { - "generic": { - "format": "[%(asctime)s - %(name)s - %(lineno)3d][%(levelname)s] %(message)s", # noqa: E501 - }, - }, - "root": {"level": "INFO", "handlers": ["console"]}, - "loggers": { - "gunicorn.error": { - "level": "DEBUG", - "handlers": ["console"], - "propagate": False, # Prevents default handler from also logging this - "qualname": "gunicorn.error", - }, - "gunicorn.access": { - "level": "INFO", - "handlers": ["console"], - "propagate": False, # Prevents default handler from also logging this - "qualname": "gunicorn.access", - }, - "": { - "level": "INFO", - "handlers": ["console"], - }, - }, - "handlers": { - "console": { - "class": "logging.StreamHandler", - "formatter": "generic", - "stream": "ext://sys.stdout", - }, - }, -} -loglevel = "debug" -wsgi_app = "indexer_worker:api" -accesslog = "-" -errorlog = "-" From 399036e699b1c0834624ba7174bd74ba1bff9f76 Mon Sep 17 00:00:00 2001 From: Madison Swain-Bowden Date: Wed, 12 Jul 2023 15:48:24 -0700 Subject: [PATCH 7/7] Add reference link --- ingestion_server/gunicorn.conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ingestion_server/gunicorn.conf.py b/ingestion_server/gunicorn.conf.py index 8cf280ac576..842c29b2bcf 100644 --- a/ingestion_server/gunicorn.conf.py +++ b/ingestion_server/gunicorn.conf.py @@ -6,6 +6,7 @@ reload = True logconfig_dict = { # NOTE: Most of this is inherited from the default configuration + # https://github.com/benoitc/gunicorn/blob/cc2e3835784542e65886cd27f64d444309fbaad0/gunicorn/glogging.py#L48-L86 "version": 1, "disable_existing_loggers": False, "formatters": {