From fb957a2509079d1c42f033ccb0e12eef932a97fd Mon Sep 17 00:00:00 2001 From: Clifton King Date: Sat, 23 Jan 2021 22:09:33 -0600 Subject: [PATCH 1/3] minimum viable docker build --- Dockerfile | 13 +++++++++++++ lexpredict_openedgar/requirements/full.txt | 12 ++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8deccd8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.6 + +RUN pip install --upgrade pip + +ENV OPENEDGAR_DIR /opt/openedgar +RUN mkdir -p ${OPENEDGAR_DIR} +WORKDIR ${OPENEDGAR_DIR} + +# might be necessary for pandas=0.22 +# sudo apt-get install libblas3 liblapack3 liblapack-dev libblas-dev gfortran libatlas-base-dev + +COPY lexpredict_openedgar/requirements/full.txt requirements.txt +RUN pip install -r requirements.txt \ No newline at end of file diff --git a/lexpredict_openedgar/requirements/full.txt b/lexpredict_openedgar/requirements/full.txt index f34945d..3b919d3 100755 --- a/lexpredict_openedgar/requirements/full.txt +++ b/lexpredict_openedgar/requirements/full.txt @@ -18,7 +18,7 @@ cffi==1.11.5 chardet==3.0.4 coverage==4.5.1 coveralls==1.3.0 -datefinder==0.6.1 +datefinder==0.7.1 decorator==4.3.0 defusedxml==0.5.0 Django==2.0.8 @@ -58,7 +58,7 @@ jupyter-core==4.4.0 kombu==3.0.37 lazy-object-proxy==1.3.1 lxml==4.1.1 -MarkupSafe==1.0 +MarkupSafe==1.1.1 mccabe==0.6.1 mistune==0.8.3 more-itertools==4.1.0 @@ -71,7 +71,7 @@ num2words==0.5.5 numpy==1.14.3 oauthlib==2.0.7 packaging==17.1 -pandas==0.22.0 +pandas==0.21.0 pandocfilters==1.4.2 parso==0.2.0 pexpect==4.5.0 @@ -107,7 +107,7 @@ qtconsole==4.3.1 redis==2.10.6 regex==2017.9.23 reporters-db==1.0.12.1 -requests==2.20.0 +requests==2.18.4 requests-oauthlib==0.8.0 s3transfer==0.1.13 scikit-learn==0.19.1 @@ -128,11 +128,11 @@ tornado==5.0.2 traitlets==4.3.2 typing==3.6.2 Unidecode==0.4.21 -urllib3==1.23 +urllib3==1.22 wcwidth==0.1.7 webencodings==0.5.1 Werkzeug==0.14.1 whitenoise==3.3.1 widgetsnbextension==3.2.1 wrapt==1.10.11 -https://github.com/LexPredict/lexpredict-lexnlp/archive/0.1.8.zip \ No newline at end of file +https://github.com/LexPredict/lexpredict-lexnlp/archive/0.1.9.zip \ No newline at end of file From 19f298a428f1379b6cd1af546915d58543dc0b06 Mon Sep 17 00:00:00 2001 From: Clifton King Date: Sun, 24 Jan 2021 00:49:51 -0600 Subject: [PATCH 2/3] openedgar worker booting / shell commands work --- .gitignore | 1 + Dockerfile | 4 +- docker-compose.yml | 105 ++++++++++++++++++ lexpredict_openedgar/config/settings/base.py | 13 ++- .../config/settings/production.py | 5 +- lexpredict_openedgar/manage.py | 11 ++ lexpredict_openedgar/openedgar/clients/s3.py | 11 +- lexpredict_openedgar/requirements/full.txt | 4 + 8 files changed, 141 insertions(+), 13 deletions(-) create mode 100644 docker-compose.yml diff --git a/.gitignore b/.gitignore index e272905..05553ba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *# *~ .idea/ +.vscode/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/Dockerfile b/Dockerfile index 8deccd8..673c3f3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,10 @@ FROM python:3.6 +RUN apt-get update && apt-get install -y postgresql-client-common libpq-dev + RUN pip install --upgrade pip -ENV OPENEDGAR_DIR /opt/openedgar +ENV OPENEDGAR_DIR /opt/openedgar/lexpredict_openedgar RUN mkdir -p ${OPENEDGAR_DIR} WORKDIR ${OPENEDGAR_DIR} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..e70aebf --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,105 @@ +version: "3.8" + +services: + openedgar-base: + build: . + environment: + - DATABASE_URL=postgres://openedgar:openedgar@postgres:5432/openedgar + - CELERY_BROKER_URL=amqp://openedgar:openedgar@rabbitmq:5672/openedgar + - CELERY_RESULT_BACKEND=rpc + - CELERY_RESULT_PERSISTENT=False + - DJANGO_SECRET_KEY=DJANGO_SECRET_KEY + - DJANGO_SETTINGS_MODULE=config.settings.production + - DJANGO_AWS_ACCESS_KEY_ID=minio_access_key + - DJANGO_AWS_SECRET_ACCESS_KEY=minio_secret_key + - AWS_ACCESS_KEY_ID=minio_access_key + - AWS_SECRET_ACCESS_KEY=minio_secret_key + - DJANGO_AWS_STORAGE_BUCKET_NAME=django + - S3_BUCKET=openedgar + - AWS_S3_ENDPOINT_URL=http://s3:9000 + - CLIENT_TYPE=S3 + - DJANGO_MAILGUN_API_KEY= + - MAILGUN_SENDER_DOMAIN= + - DJANGO_ADMIN_URL= + volumes: + - .:/opt/openedgar + + shell: + extends: openedgar-base + links: + - postgres + - tika + - rabbitmq + - s3 + command: python manage.py shell + + web: + extends: openedgar-base + links: + - postgres + - tika + - rabbitmq + - s3 + ports: + - 8000:8000 + command: python manage.py runserver 0:8000 + + worker: + extends: openedgar-base + environment: + - C_FORCE_ROOT=1 + links: + - postgres + - tika + - rabbitmq + - s3 + command: celery -A lexpredict_openedgar.taskapp worker --loglevel=INFO -c16 + + tika: + image: apache/tika + ports: + - 9998:9998 + + postgres: + image: postgres + ports: + - 5432:5432 + environment: + - POSTGRES_USER=openedgar + - POSTGRES_PASSWORD=openedgar + - POSTGRES_DB=openedgar + volumes: + - postgres-data:/var/lib/postgresql/data/ + + rabbitmq: + image: rabbitmq:3-management-alpine + container_name: rabbitmq + volumes: + - rabbitmq-etc:/etc/rabbitmq/ + - rabbitmq-data:/var/lib/rabbitmq/ + - rabbitmq-logs:/var/log/rabbitmq/ + environment: + RABBITMQ_DEFAULT_USER: openedgar + RABBITMQ_DEFAULT_PASS: openedgar + RABBITMQ_DEFAULT_VHOST: openedgar + ports: + - 5672:5672 + - 15672:15672 + + s3: + image: minio/minio + ports: + - 9000:9000 + volumes: + - minio-data:/data + environment: + MINIO_ACCESS_KEY: minio_access_key + MINIO_SECRET_KEY: minio_secret_key + command: server /data + +volumes: + minio-data: + postgres-data: + rabbitmq-data: + rabbitmq-logs: + rabbitmq-etc: diff --git a/lexpredict_openedgar/config/settings/base.py b/lexpredict_openedgar/config/settings/base.py index 02c2ad3..db0759c 100755 --- a/lexpredict_openedgar/config/settings/base.py +++ b/lexpredict_openedgar/config/settings/base.py @@ -31,10 +31,10 @@ For the full list of settings and their values, see https://docs.djangoproject.com/en/dev/ref/settings/ """ -import pathlib -import environ +import pathlib +import environ ROOT_DIR = environ.Path( __file__) - 3 # (lexpredict_openedgar/config/settings/base.py - 3 = lexpredict_openedgar/) APPS_DIR = ROOT_DIR.path('lexpredict_openedgar') @@ -294,7 +294,7 @@ # SLUGLIFIER AUTOSLUG_SLUGIFY_FUNCTION = 'slugify.slugify' -########## CELERY +# CELERY INSTALLED_APPS += ['lexpredict_openedgar.taskapp.celery.CeleryConfig'] CELERY_BROKER_URL = env('CELERY_BROKER_URL', default='django://') CELERY_RESULT_BACKEND = 'rpc' @@ -304,7 +304,7 @@ #: from unwanted access (see userguide/security.html) CELERY_ACCEPT_CONTENT = ['pickle'] CELERY_TASK_SERIALIZER = 'pickle' -########## END CELERY +# END CELERY # Location of root django.contrib.admin URL, use {% url 'admin:index' %} @@ -326,12 +326,13 @@ HTTP_SLEEP_DEFAULT = 0.0 # S3 bucket configuration -S3_ACCESS_KEY = env('S3_ACCESS_KEY', default="") -S3_SECRET_KEY = env('S3_SECRET_KEY', default="") +S3_ACCESS_KEY = env('AWS_ACCESS_KEY_ID', default=env('S3_ACCESS_KEY', default="")) +S3_SECRET_KEY = env('AWS_SECRET_ACCESS_KEY', default=env('S3_SECRET_KEY', default="")) S3_BUCKET = env('S3_BUCKET', default="") S3_DOCUMENT_PATH = env('S3_DOCUMENT_PATH', default="openedgar") S3_PREFIX = env('S3_PREFIX', default="documents") S3_COMPRESSION_LEVEL = int(env('S3_COMPRESSION_LEVEL', default=6)) +S3_ENDPOINT = env('AWS_S3_ENDPOINT_URL', default="") # Tika configuration TIKA_HOST = "localhost" diff --git a/lexpredict_openedgar/config/settings/production.py b/lexpredict_openedgar/config/settings/production.py index 371f34f..5456046 100755 --- a/lexpredict_openedgar/config/settings/production.py +++ b/lexpredict_openedgar/config/settings/production.py @@ -31,6 +31,7 @@ - Use Redis for cache """ +import dj_database_url from .base import * # noqa # SECRET CONFIGURATION @@ -133,8 +134,8 @@ # Use the Heroku-style specification # Raises ImproperlyConfigured exception if DATABASE_URL not in os.environ -DATABASES['default'] = env.db('DATABASE_URL') -DATABASES['default']['CONN_MAX_AGE'] = env.int('CONN_MAX_AGE', default=60) +# DATABASES['default'] = env.db('DATABASE_URL') +DATABASES['default'] = dj_database_url.config(conn_max_age=600) DATABASES['default']['ATOMIC_REQUESTS'] = True # CACHING diff --git a/lexpredict_openedgar/manage.py b/lexpredict_openedgar/manage.py index dc591af..e965eda 100755 --- a/lexpredict_openedgar/manage.py +++ b/lexpredict_openedgar/manage.py @@ -26,4 +26,15 @@ current_path = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.join(current_path, 'lexpredict_openedgar')) + from django.db import connection + if 'openedgar_company' not in connection.introspection.table_names(): + execute_from_command_line(['manage.py', 'migrate']) + + from openedgar.clients.s3 import S3Client + s3 = S3Client() + try: + s3.get_bucket().create() + except: + pass + execute_from_command_line(sys.argv) diff --git a/lexpredict_openedgar/openedgar/clients/s3.py b/lexpredict_openedgar/openedgar/clients/s3.py index 367f3aa..420e05b 100755 --- a/lexpredict_openedgar/openedgar/clients/s3.py +++ b/lexpredict_openedgar/openedgar/clients/s3.py @@ -34,7 +34,7 @@ from typing import Union -from config.settings.base import S3_ACCESS_KEY, S3_BUCKET, S3_COMPRESSION_LEVEL, S3_SECRET_KEY +from config.settings.base import S3_ACCESS_KEY, S3_BUCKET, S3_COMPRESSION_LEVEL, S3_SECRET_KEY, S3_ENDPOINT # Setup logger logger = logging.getLogger(__name__) @@ -57,7 +57,8 @@ def get_resource(self): :return: returns boto3 S3 resource object """ # Create S3 resource - s3 = boto3.resource('s3', aws_access_key_id=S3_ACCESS_KEY, aws_secret_access_key=S3_SECRET_KEY) + s3 = boto3.resource('s3', endpoint_url=S3_ENDPOINT, aws_access_key_id=S3_ACCESS_KEY, + aws_secret_access_key=S3_SECRET_KEY) return s3 def get_client(self): @@ -66,7 +67,8 @@ def get_client(self): :return: returns boto3 S3 client object """ # Create S3 client - client = boto3.client('s3', aws_access_key_id=S3_ACCESS_KEY, aws_secret_access_key=S3_SECRET_KEY) + client = boto3.client('s3', aws_access_key_id=S3_ACCESS_KEY, + aws_secret_access_key=S3_SECRET_KEY, endpoint_url=S3_ENDPOINT) return client def get_bucket(self): @@ -75,7 +77,8 @@ def get_bucket(self): :return: returns boto3 S3 bucket resource """ # Get bucket - s3 = boto3.resource('s3', aws_access_key_id=S3_ACCESS_KEY, aws_secret_access_key=S3_SECRET_KEY) + s3 = boto3.resource('s3', aws_access_key_id=S3_ACCESS_KEY, + aws_secret_access_key=S3_SECRET_KEY, endpoint_url=S3_ENDPOINT) bucket = s3.Bucket(S3_BUCKET) return bucket diff --git a/lexpredict_openedgar/requirements/full.txt b/lexpredict_openedgar/requirements/full.txt index 3b919d3..6185d0d 100755 --- a/lexpredict_openedgar/requirements/full.txt +++ b/lexpredict_openedgar/requirements/full.txt @@ -135,4 +135,8 @@ Werkzeug==0.14.1 whitenoise==3.3.1 widgetsnbextension==3.2.1 wrapt==1.10.11 +gunicorn==20.0.4 +django-storages==1.9.1 +django-anymail==8.1 +dj-database-url==0.5.0 https://github.com/LexPredict/lexpredict-lexnlp/archive/0.1.9.zip \ No newline at end of file From c69a3f190bde97f762c85a72ca334078114ea7cd Mon Sep 17 00:00:00 2001 From: Clifton King Date: Sun, 24 Jan 2021 00:51:54 -0600 Subject: [PATCH 3/3] remove hardcoded container name on rabbitmq --- docker-compose.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index e70aebf..42c1b44 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -73,7 +73,6 @@ services: rabbitmq: image: rabbitmq:3-management-alpine - container_name: rabbitmq volumes: - rabbitmq-etc:/etc/rabbitmq/ - rabbitmq-data:/var/lib/rabbitmq/