diff --git a/changelog.md b/changelog.md index ab6181f..bfa5dbd 100644 --- a/changelog.md +++ b/changelog.md @@ -6,6 +6,8 @@ Brief summary of what's in this release: - Added logic to change ownership of moved files off of root user - Removed logic that required connection to spectrack for dlu-watcher - updated logic for handling nested folders +- updated base image of DluWatcher container +- set install of requirements as 'progress-banner off' to address issue with installing Flask and using all threads in DluWatcher container ### Breaking changes diff --git a/data_management/DluWatcher b/data_management/DluWatcher index 4dfd2dc..fa8cada 100644 --- a/data_management/DluWatcher +++ b/data_management/DluWatcher @@ -1,8 +1,10 @@ -FROM python:3.10-slim +FROM python:3.10-slim-bullseye COPY requirements.txt ./ -RUN pip3 install --no-cache-dir -r requirements.txt +RUN pip3 install --progress-bar off --no-cache-dir -r requirements.txt +RUN pip3 install -U flask-cors + COPY ./lib/ ./lib COPY ./services/dlu_filesystem.py ./services/dlu_filesystem.py COPY ./services/dlu_package_inventory.py ./services/dlu_package_inventory.py @@ -12,4 +14,4 @@ COPY ./services/dlu_mongo.py ./services/dlu_mongo.py COPY ./model ./model COPY ./watch_files.py ./ -ENTRYPOINT ["python3", "watch_files.py"] \ No newline at end of file +ENTRYPOINT ["python3", "watch_files.py"] diff --git a/data_management/README.md b/data_management/README.md index 48b7181..182a1c1 100644 --- a/data_management/README.md +++ b/data_management/README.md @@ -7,31 +7,16 @@ 2. Build the Docker image: - `$ sh rebuild.sh` -3a. Run the importer service with the arguments in double quotes: - - `$ sh run.sh "-d spectrack -a update"` +3a. Run the Spectrack import service: + - `$ python3.9 main.py -d spectrack -a update` *NOTE*: If you are running this on an empty/new database, you will need to run it in insert mode first: - - `$ sh run.sh "-d spectrack -a insert"` + - `$ python3.9 main.py -d spectrack -a insert` OR 3b. Run the importer service from a local machine (eg: When running on a Mac and docker isn't available): - - `$ python3.8 main.py --data_source redcap --action insert` - -OR - -3c. Run the service as a Flask app within the dataLake network - - `$ sh run-service.sh` - -OR - -3d. Run the service as a Flask app on the host machine - - `$ sh run-service-host.sh` - -OR - -3e. Run the bulk upload with the arguments in double quotes: -- `$ sh run.sh "-d spectrack -m"` + - `$ python3.9 main.py --data_source redcap --action insert` NOTE: The (non-Flask app) service options for the Data Importer are: - [-h] -a {update,insert} -d {redcap,spectrack} @@ -116,15 +101,5 @@ This endpoint retrieves the packages that are ready to be moved from Globus. The ### rebuild library modules on change `$ python3 setup.py install --user` -## Running this on the DLU / DMD Server -The heavens-docker/libra directory has a docker-compose file for running this on the DLU / DMD server. - -## Running the Bulk Upload inside Docker -If you're running into Python library errors while running the Bulk Upload Script locally, try running it in Docker with the options added to the end: -``` -docker run -v [file source directory]:/source -v [dataLake directory]:/dataLake -v .:/usr/src/app --network=dataLake --env INSIDE_DOCKER=true --rm -ti kingstonduo/data-management:latest python3 process_bulk_uploads.py -d /source -p -``` -NOTE: The .env file variable "dlu_data_directory" is used by this script as the destination (e.g. Data Lake) directory. In the example above you would set it to "/dataLake". - ## Known Bug There is a known [bug with docker on MacOS](https://github.com/docker/for-mac/issues/2670) in which the container is unable to talk to the host network. This problem may occur when attempting to connect to a tunnel created on the host machine. To work around this issue, you can either run this on a linux machine/windows machine, or bypass docker completely and run the script directly on your local machine. diff --git a/data_management/lib/mongo_connection.py b/data_management/lib/mongo_connection.py index d155c68..3a0b293 100644 --- a/data_management/lib/mongo_connection.py +++ b/data_management/lib/mongo_connection.py @@ -42,10 +42,11 @@ def get_mongo_connection(self): ) database = mongo_client[self.database] return database - except: + except Exception: logger.error( f"Can't connect to Mongo\nMake sure you have filled out the correct environment variables in the .env file" ) + logger.error(traceback.format_exc()) logger.error(self.host) os.sys.exit() diff --git a/data_management/rebuild.sh b/data_management/rebuild.sh index 69b787c..b230e4e 100644 --- a/data_management/rebuild.sh +++ b/data_management/rebuild.sh @@ -1,2 +1,2 @@ python3 setup.py install --user -docker build -t kingstonduo/data-management:1.7 . +docker build -t kingstonduo/data-management:1.8 . diff --git a/data_management/requirements.txt b/data_management/requirements.txt index 075f075..a468132 100644 --- a/data_management/requirements.txt +++ b/data_management/requirements.txt @@ -8,4 +8,4 @@ python_dotenv python-dateutil zarr-checksum pyyaml -gunicorn \ No newline at end of file +gunicorn