Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dockerize #22

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
*#
*~
.idea/
.vscode/

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
15 changes: 15 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM python:3.6

RUN apt-get update && apt-get install -y postgresql-client-common libpq-dev

RUN pip install --upgrade pip

ENV OPENEDGAR_DIR /opt/openedgar/lexpredict_openedgar
RUN mkdir -p ${OPENEDGAR_DIR}
WORKDIR ${OPENEDGAR_DIR}

# might be necessary for pandas=0.22
# sudo apt-get install libblas3 liblapack3 liblapack-dev libblas-dev gfortran libatlas-base-dev

COPY lexpredict_openedgar/requirements/full.txt requirements.txt
RUN pip install -r requirements.txt
104 changes: 104 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
version: "3.8"

services:
openedgar-base:
build: .
environment:
- DATABASE_URL=postgres://openedgar:openedgar@postgres:5432/openedgar
- CELERY_BROKER_URL=amqp://openedgar:openedgar@rabbitmq:5672/openedgar
- CELERY_RESULT_BACKEND=rpc
- CELERY_RESULT_PERSISTENT=False
- DJANGO_SECRET_KEY=DJANGO_SECRET_KEY
- DJANGO_SETTINGS_MODULE=config.settings.production
- DJANGO_AWS_ACCESS_KEY_ID=minio_access_key
- DJANGO_AWS_SECRET_ACCESS_KEY=minio_secret_key
- AWS_ACCESS_KEY_ID=minio_access_key
- AWS_SECRET_ACCESS_KEY=minio_secret_key
- DJANGO_AWS_STORAGE_BUCKET_NAME=django
- S3_BUCKET=openedgar
- AWS_S3_ENDPOINT_URL=http://s3:9000
- CLIENT_TYPE=S3
- DJANGO_MAILGUN_API_KEY=
- MAILGUN_SENDER_DOMAIN=
- DJANGO_ADMIN_URL=
volumes:
- .:/opt/openedgar

shell:
extends: openedgar-base
links:
- postgres
- tika
- rabbitmq
- s3
command: python manage.py shell

web:
extends: openedgar-base
links:
- postgres
- tika
- rabbitmq
- s3
ports:
- 8000:8000
command: python manage.py runserver 0:8000

worker:
extends: openedgar-base
environment:
- C_FORCE_ROOT=1
links:
- postgres
- tika
- rabbitmq
- s3
command: celery -A lexpredict_openedgar.taskapp worker --loglevel=INFO -c16

tika:
image: apache/tika
ports:
- 9998:9998

postgres:
image: postgres
ports:
- 5432:5432
environment:
- POSTGRES_USER=openedgar
- POSTGRES_PASSWORD=openedgar
- POSTGRES_DB=openedgar
volumes:
- postgres-data:/var/lib/postgresql/data/

rabbitmq:
image: rabbitmq:3-management-alpine
volumes:
- rabbitmq-etc:/etc/rabbitmq/
- rabbitmq-data:/var/lib/rabbitmq/
- rabbitmq-logs:/var/log/rabbitmq/
environment:
RABBITMQ_DEFAULT_USER: openedgar
RABBITMQ_DEFAULT_PASS: openedgar
RABBITMQ_DEFAULT_VHOST: openedgar
ports:
- 5672:5672
- 15672:15672

s3:
image: minio/minio
ports:
- 9000:9000
volumes:
- minio-data:/data
environment:
MINIO_ACCESS_KEY: minio_access_key
MINIO_SECRET_KEY: minio_secret_key
command: server /data

volumes:
minio-data:
postgres-data:
rabbitmq-data:
rabbitmq-logs:
rabbitmq-etc:
13 changes: 7 additions & 6 deletions lexpredict_openedgar/config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@
For the full list of settings and their values, see
https://docs.djangoproject.com/en/dev/ref/settings/
"""
import pathlib

import environ

import pathlib
import environ
ROOT_DIR = environ.Path(
__file__) - 3 # (lexpredict_openedgar/config/settings/base.py - 3 = lexpredict_openedgar/)
APPS_DIR = ROOT_DIR.path('lexpredict_openedgar')
Expand Down Expand Up @@ -294,7 +294,7 @@
# SLUGLIFIER
AUTOSLUG_SLUGIFY_FUNCTION = 'slugify.slugify'

########## CELERY
# CELERY
INSTALLED_APPS += ['lexpredict_openedgar.taskapp.celery.CeleryConfig']
CELERY_BROKER_URL = env('CELERY_BROKER_URL', default='django://')
CELERY_RESULT_BACKEND = 'rpc'
Expand All @@ -304,7 +304,7 @@
#: from unwanted access (see userguide/security.html)
CELERY_ACCEPT_CONTENT = ['pickle']
CELERY_TASK_SERIALIZER = 'pickle'
########## END CELERY
# END CELERY


# Location of root django.contrib.admin URL, use {% url 'admin:index' %}
Expand All @@ -326,12 +326,13 @@
HTTP_SLEEP_DEFAULT = 0.0

# S3 bucket configuration
S3_ACCESS_KEY = env('S3_ACCESS_KEY', default="")
S3_SECRET_KEY = env('S3_SECRET_KEY', default="")
S3_ACCESS_KEY = env('AWS_ACCESS_KEY_ID', default=env('S3_ACCESS_KEY', default=""))
S3_SECRET_KEY = env('AWS_SECRET_ACCESS_KEY', default=env('S3_SECRET_KEY', default=""))
S3_BUCKET = env('S3_BUCKET', default="")
S3_DOCUMENT_PATH = env('S3_DOCUMENT_PATH', default="openedgar")
S3_PREFIX = env('S3_PREFIX', default="documents")
S3_COMPRESSION_LEVEL = int(env('S3_COMPRESSION_LEVEL', default=6))
S3_ENDPOINT = env('AWS_S3_ENDPOINT_URL', default="")

# Tika configuration
TIKA_HOST = "localhost"
Expand Down
5 changes: 3 additions & 2 deletions lexpredict_openedgar/config/settings/production.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
- Use Redis for cache
"""

import dj_database_url
from .base import * # noqa

# SECRET CONFIGURATION
Expand Down Expand Up @@ -133,8 +134,8 @@

# Use the Heroku-style specification
# Raises ImproperlyConfigured exception if DATABASE_URL not in os.environ
DATABASES['default'] = env.db('DATABASE_URL')
DATABASES['default']['CONN_MAX_AGE'] = env.int('CONN_MAX_AGE', default=60)
# DATABASES['default'] = env.db('DATABASE_URL')
DATABASES['default'] = dj_database_url.config(conn_max_age=600)
DATABASES['default']['ATOMIC_REQUESTS'] = True

# CACHING
Expand Down
11 changes: 11 additions & 0 deletions lexpredict_openedgar/manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,15 @@
current_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(current_path, 'lexpredict_openedgar'))

from django.db import connection
if 'openedgar_company' not in connection.introspection.table_names():
execute_from_command_line(['manage.py', 'migrate'])

from openedgar.clients.s3 import S3Client
s3 = S3Client()
try:
s3.get_bucket().create()
except:
pass

execute_from_command_line(sys.argv)
11 changes: 7 additions & 4 deletions lexpredict_openedgar/openedgar/clients/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

from typing import Union

from config.settings.base import S3_ACCESS_KEY, S3_BUCKET, S3_COMPRESSION_LEVEL, S3_SECRET_KEY
from config.settings.base import S3_ACCESS_KEY, S3_BUCKET, S3_COMPRESSION_LEVEL, S3_SECRET_KEY, S3_ENDPOINT

# Setup logger
logger = logging.getLogger(__name__)
Expand All @@ -57,7 +57,8 @@ def get_resource(self):
:return: returns boto3 S3 resource object
"""
# Create S3 resource
s3 = boto3.resource('s3', aws_access_key_id=S3_ACCESS_KEY, aws_secret_access_key=S3_SECRET_KEY)
s3 = boto3.resource('s3', endpoint_url=S3_ENDPOINT, aws_access_key_id=S3_ACCESS_KEY,
aws_secret_access_key=S3_SECRET_KEY)
return s3

def get_client(self):
Expand All @@ -66,7 +67,8 @@ def get_client(self):
:return: returns boto3 S3 client object
"""
# Create S3 client
client = boto3.client('s3', aws_access_key_id=S3_ACCESS_KEY, aws_secret_access_key=S3_SECRET_KEY)
client = boto3.client('s3', aws_access_key_id=S3_ACCESS_KEY,
aws_secret_access_key=S3_SECRET_KEY, endpoint_url=S3_ENDPOINT)
return client

def get_bucket(self):
Expand All @@ -75,7 +77,8 @@ def get_bucket(self):
:return: returns boto3 S3 bucket resource
"""
# Get bucket
s3 = boto3.resource('s3', aws_access_key_id=S3_ACCESS_KEY, aws_secret_access_key=S3_SECRET_KEY)
s3 = boto3.resource('s3', aws_access_key_id=S3_ACCESS_KEY,
aws_secret_access_key=S3_SECRET_KEY, endpoint_url=S3_ENDPOINT)
bucket = s3.Bucket(S3_BUCKET)
return bucket

Expand Down
16 changes: 10 additions & 6 deletions lexpredict_openedgar/requirements/full.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ cffi==1.11.5
chardet==3.0.4
coverage==4.5.1
coveralls==1.3.0
datefinder==0.6.1
datefinder==0.7.1
decorator==4.3.0
defusedxml==0.5.0
Django==2.0.8
Expand Down Expand Up @@ -58,7 +58,7 @@ jupyter-core==4.4.0
kombu==3.0.37
lazy-object-proxy==1.3.1
lxml==4.1.1
MarkupSafe==1.0
MarkupSafe==1.1.1
mccabe==0.6.1
mistune==0.8.3
more-itertools==4.1.0
Expand All @@ -71,7 +71,7 @@ num2words==0.5.5
numpy==1.14.3
oauthlib==2.0.7
packaging==17.1
pandas==0.22.0
pandas==0.21.0
pandocfilters==1.4.2
parso==0.2.0
pexpect==4.5.0
Expand Down Expand Up @@ -107,7 +107,7 @@ qtconsole==4.3.1
redis==2.10.6
regex==2017.9.23
reporters-db==1.0.12.1
requests==2.20.0
requests==2.18.4
requests-oauthlib==0.8.0
s3transfer==0.1.13
scikit-learn==0.19.1
Expand All @@ -128,11 +128,15 @@ tornado==5.0.2
traitlets==4.3.2
typing==3.6.2
Unidecode==0.4.21
urllib3==1.23
urllib3==1.22
wcwidth==0.1.7
webencodings==0.5.1
Werkzeug==0.14.1
whitenoise==3.3.1
widgetsnbextension==3.2.1
wrapt==1.10.11
https://github.com/LexPredict/lexpredict-lexnlp/archive/0.1.8.zip
gunicorn==20.0.4
django-storages==1.9.1
django-anymail==8.1
dj-database-url==0.5.0
https://github.com/LexPredict/lexpredict-lexnlp/archive/0.1.9.zip