diff --git a/.github/workflows/data-transfer-s3-to-github.yml b/.github/workflows/data-transfer-s3-to-github.yml new file mode 100644 index 0000000..4d0ece2 --- /dev/null +++ b/.github/workflows/data-transfer-s3-to-github.yml @@ -0,0 +1,33 @@ +# This transfers files from S3 to Github. +# TODO: incorporate into the script in this repo +# blocked by https://github.com/github/rest-api-description/issues/1147 +name: Ebola data S3 -> Github + +on: + schedule: + - cron: '10 19 * * *' + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Download latest.csv file from S3 + uses: keithweaver/aws-s3-github-action@v1.0.0 + with: + command: cp + source: s3://${{ secrets.S3_BUCKET }}/latest.csv + destination: ./bucket/latest.csv + aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws_region: eu-central-1 + - name: Upload files to GitHub + run: | + git pull + sudo mv bucket/latest.csv . + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git add latest.csv + git commit -m "Updating latest.csv" || echo "Nothing changed since last run." + git push diff --git a/.github/workflows/ingestion_deploy.yml b/.github/workflows/ingestion_deploy.yml new file mode 100644 index 0000000..e175a83 --- /dev/null +++ b/.github/workflows/ingestion_deploy.yml @@ -0,0 +1,36 @@ +name: GSheets to S3/DB script deploy + +on: + push: + branches: [main] + paths: + - 'scripts/ingestion/*.py' + - 'scripts/ingestion/pyproject.toml' + - 'scripts/ingestion/poetry.lock' + - 'scripts/Dockerfile' + workflow_dispatch: + +jobs: + deploy: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: eu-central-1 + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v1 + + - name: Build, tag, and push image to Amazon ECR (latest) + env: + REGISTRY: ${{ steps.login-ecr.outputs.registry }} + REPO: ebola + IMAGE_TAG: ${{ github.sha }} + run: | + cd scripts/gh_data_update && docker build -f ./../Dockerfile -t $REGISTRY/$REPO:latest . + docker push $REGISTRY/$REPO:latest diff --git a/.github/workflows/ingestion_tests.yml b/.github/workflows/ingestion_tests.yml new file mode 100644 index 0000000..0f2f120 --- /dev/null +++ b/.github/workflows/ingestion_tests.yml @@ -0,0 +1,27 @@ +name: Test GSheets to S3/DB script + +on: + push: + branches: [main] + paths: + - '.github/workflows/gh_data_update_tests.yml' + - 'scripts/ingestion/*.py' + - 'scripts/ingestion/pyproject.toml' + - 'scripts/ingestion/poetry.lock' + - 'scripts/Dockerfile-test' + pull_request: + paths: + - '.github/workflows/gh_data_update_tests.yml' + - 'scripts/ingestion/*.py' + - 'scripts/ingestion/pyproject.toml' + - 'scripts/ingestion/poetry.lock' + - 'scripts/Dockerfile-test' + + +jobs: + tests: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v3 + - name: Run tests + run: cd scripts/ingestion && ./test_stack.sh \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b6e4761 --- /dev/null +++ b/.gitignore @@ -0,0 +1,129 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a1862dd --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Global.health + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..17bb5c1 --- /dev/null +++ b/README.md @@ -0,0 +1,63 @@ +# Ebola data Uganda outbreak 2022 + +This repository contains dated records of curated Ebola cases from the 2022 outbreak in Uganda. Data are curated from openly accessible sources. We continue to experience ongoing challenges in data curation, discussed below. Line-list data may change due to ongoing data reconciliation and validation. + +Our latest data set and archives can be found [here](https://3mmuwilir3.execute-api.eu-central-1.amazonaws.com/web). + +UPDATE: + +2023-01-20. On January 11th, 2023, the Ugandan Ministry of Health declared the end of the SVD Ebola outbreak. The final number of confirmed cases is 142. G.h counts for confirmed cases by district matches that reported in the MOH SitReps through [#90](https://www.afro.who.int/sites/default/files/2023-01/Ug_EVD_SitRep%2390.pdf) (Mubende 64; Kyegegwa 4; Kassanda 49; Kagadi 1; Masaka 1; Wakiso 4; Jinja 1; Kampala 17; Bunyangabu 1); however, SitRep [#91](https://www.afro.who.int/sites/default/files/2023-01/Ug_EVD_SitRep%2391.pdf) reassigned a KAL case to WAK without explanation. Therefore, final counts of confirmed cases by district do not match. + +Also, Outcome for ID#s 149, 150, 157, 158, 161, and 162 remain unassigned for reasons outlined in our [blog post](https://globaldothealth.substack.com/p/curator-review-for-2022-reflecting). + +Finally, Uganda's [MOH website](https://www.health.go.ug/ebola/) differs from final SitRep data in [#93](https://www.afro.who.int/sites/default/files/2023-01/Ug_EVD_SitRep%2393.pdf). The MOH website reports 142 cases/ 56 deaths/ 86 recoveries. SitRep data reports 142 cases/ 55 deaths/ 87 recoveries. + +We recognize these discrepancies and remain limited by the detail and accuracy of information publicly released by the MOH. Questions can be addressed to info@global.health. + +2022-12-14. The sum of deaths and recoveries for the G.h dataset is not in alignment with current MOH numbers. Our curation team identified count and location discrepancies in data from SitRep [66](https://www.afro.who.int/countries/uganda/publication/ebola-virus-disease-uganda-sitrep-66) and beyond that prevent us from updating the Outcome for the remaining cases. Also, SitRep [68](https://www.afro.who.int/countries/uganda/publication/ebola-virus-disease-uganda-sitrep-68) reported a reclassification of Outcome for a Mubende case; however, without further detail, we are unable to identify a specific ID# for the change. We are limited by the detail and accuracy of information publicly released by the MOH. We will continue to check for new MOH reports that may provide updated/corrected case information that could reconcile differences between our two datasets. + +2022-11-23: The curation team has completed a data reconciliation exercise to update case data between SitReps (e.g. deaths, recoveries, HCW, and location information). The "Date_last_modified" column lists the date of reconciliation on 2022-11-22. Data are updated through SitRep [57](https://www.afro.who.int/countries/uganda/publication/ebola-virus-disease-uganda-sitrep-57) with a total of 141 confirmed cases, including 55 deaths, and 79 recoveries. + +2022-11-04: Confirmed cases have been added to the line-list through SitRep [41](https://www.afro.who.int/sites/default/files/2022-11/Ug_EVD_SitRep%2341.pdf) @131 cases. Our curation team is working to reconcile case data between SitReps (e.g. deaths, recoveries, HCW), which will take time to complete due to changing report format and fluctuating counts that greatly complicate the review process. + +## Data curation +This section is an overview of the data curation process, a discussion about limitations and assumptions. + +Curation, especially early in the outbreak, is a manual, labor-intensive process. We experience many recurring challenges in building an emerging disease dataset in real-time. + +The Ebola line-list is built from a collection of sources, listed here, which will be updated as new sources become available: https://github.com/globaldothealth/ebola/wiki. The original source(s) of information is provided for each line-list ID in our database. The WHO provides Situation Reports (SitReps) from the Uganda Ministry of Health; while not available from the start of the outbreak, these resources have become a primary source for information. However, we remain limited by inconsistent, aggregated, or missing case information; change in reporting format; data reconciliation; conflicting details; confusing statements; reporting delays. We frequently observe conflicting data and details between sources, and even within a single report. We have not outlined the specific challenges or discrepancies for each [SitRep](https://www.afro.who.int/countries/publications?country=879), but can discuss further as needed. + +Reports from government/official sources can be enriched with supplemental information retrieved from local reporting (including media) or other sources. Metadata are added at any time, as information becomes available and our time and resources permit. After making changes, the case will be recorded as modified with the date. Multiple curators look at each datapoint and any discrepancies are resolved in conversations between them. Assumptions are made that may compromise the accuracy of the data. + +Users should refer to our [data dictionary](data_dictionary.yml) for a description of each variable. Assumptions for select variables are briefly discussed below. + +**Case_status**: Only confirmed and probable cases are logged at this time. + +**Date_of_onset**: Information is only available for probable cases; we are unable to disaggregate Date_of_onset details for confirmed cases. + +**Date_confirmation**: The report date is used when a Date_confirmation is not specified by source. + +**Outcome**. Type: Death: The report date is used when a Date_death is not specified by source. If the number of deaths exceeds the number of new cases reported for that day, then deaths are logged under a previous ID with unassigned outcome and corresponding location information, when available. + +**Outcome**. Type: Recovery: When a recovery is reported, the recovery is assigned to a previous ID with unassigned outcome and corresponding location information, when available. + +**Healthcare_worker**: Healthcare worker information (e.g. location, date_confirmation, outcome) is not consistently provided by source. Supplemental sources are used, when available, to gain context that may help us to assign an ID. However, due to the limited availability of information, we have not been able to log every confirmed HCW case or outcome. + +Data are hand-curated. The process and methods to create, organize, and maintain data have been applied with consistency; however, we’re human and mistakes happen. As stated above, line-list data may change due to ongoing data reconciliation and validation. We welcome your contributions and feedback. Get involved! + +## Contributing + +If you would like to request changes, [open an issue](https://github.com/globaldothealth/ebola/issues/new) on this repository and we will happily consider your request. +If requesting a fix please include steps to reproduce undesirable behaviors. + +If you would like to contribute, assign an issue to yourself and/or reach out to a contributor and we will happily help you help us. + +If you want to send data to us, you can use our template at [ebola-template.csv](ebola-template.csv) which makes +it easier for us to add to our list. Just open an issue and attach a CSV / XLSX file in this repository, +or email data to info@global.health. Remove any Personally Identifiable Information. + +## License and attribution + +This repository is published under MIT License and data exports are published under the CC BY 4.0 license. + +Please cite as: "Global.health Ebola (accessed on YYYY-MM-DD)" & please add the appropriate agency, paper, and/or individual in publications and/or derivatives using these data, contact them regarding the legal use of these data, and remember to pass-forward any existing license/warranty/copyright information. diff --git a/data_dictionary.yml b/data_dictionary.yml new file mode 100644 index 0000000..2541dfb --- /dev/null +++ b/data_dictionary.yml @@ -0,0 +1,300 @@ +dictionary_last_modified: 2022-11-04 +fields: + - name: ID + type: string + description: > + Unique ID of the case + required: true + + - name: Pathogen + type: string + description: > + Constant, pre-filled value, pathogen of interest + required: true + + - name: Case_status + type: confirmed | suspected | discarded | omit_error + description: > + Status of a case. Cases which are discarded were previously + suspected but have now been confirmed negative, and should + be excluded from case counts. Cases which are omit_error were + incorrectly added and should be dismissed from any data + interpretation. + required: true + + - name: Location + type: string + description: Country subdivision where case was reported + examples: [Islington, Croydon] + + - name: City + type: string + description: City where case was reported + examples: [London, Madrid, Sydney, Boston] + + - name: Country + type: string + description: Country where case was reported + required: true + examples: [England, United States, Spain, Australia] + + - name: Country_ISO3 + type: string + description: ISO 3166-1 alpha-3, three letter country codes + required: true + examples: [ITA, GBR, ESP, BOL] + + - name: Age + type: integer-range + description: Age of the individual, specified as a range, either open-ended (n) or as a range delimited by a hyphen (m-n) following 5-year age increments (m-n) + + - name: Gender + type: male | female | other + description: Gender of the individual + + - name: Occupation + type: string + description: Free response entry describing the individual's occupation + + - name: Healthcare_worker + type: Y | N | NA + description: Is the individual a healthcare worker (Y=Yes, N=No, NA=Not applicable)? + + - name: Symptoms + type: string + description: Comma separated list of symptoms + examples: + - rash + - vesicular rash + - skin lesions + - ulcerative lesions + - oral and genital ulcers + - fever + + - name: Date_onset + type: iso8601date + description: Date of onset of symptoms + + - name: Date_confirmation + type: iso8601date + description: Date when case was confirmed + + - name: Confirmation_method + type: string + description: Test used to confirm diagnosis + examples: ["RT-PCR"] + + - name: Previous_infection + type: Y | N | NA + description: Did the individual test positive for the infection prior to the most recent diagnosis (Y=Yes, N=No, NA=Not applicable)? + + - name: Co_infection + type: string + description: If the individual tested positive for another pathogen + + - name: Pre_existing_condition + type: string + description: If the individual has any pre-existing medical conditions + + - name: Pregnancy_status + type: Y | N | NA + description: Is the case pregnant or post-partum? + + - name: Vaccination + type: Y | N | NA + description: Has the individual received a dose of vaccine (Y=Yes, N=No, NA=Not applicable)? + + - name: Vaccine_name + type: string + description: Name of the first vaccine + + - name: Vaccine_date + type: iso8601date + Description: Date of first vaccination + + - name: Vaccine_side_effects + type: string + description: Comma separated list of symptoms experienced after receiving the vaccine (i.e. cough, sore throat, etc.) + + - name: Date_of_first_consult + type: iso8601date + Description: Date that the individual received first clinical consultation + + - name: Hospitalised (Y/N/NA) + type: Y | N | NA + description: Whether individual was hospitalised (Y=Yes, N=No, NA=Not applicable) + + - name: Reason for hospitalisation + type: monitoring | treatment | unknown + Description: Reason why the individual was hospitalised; can list multiple, comma separated + + - name: Date_hospitalisation + type: iso8601date + Description: Date individual was hospitalised + + - name: Date_discharge_hospital + type: iso8601date + Description: > + Date that the individual was discharged from the hospital. Note: there is a separate field for ICU discharge. + + - name: Intensive_care (Y/N/NA) + type: Y | N | NA + description: Whether individual admitted to an intensive care unit or high dependency unit at hospital (Y=Yes, N=No, NA=Not applicable) + + - name: Date_admission_ICU + type: Y | N | NA + description: Date individual entered intensive care unit + + - name: Date_discharge_ICU + type: Y | N | NA + description: Date that the individual was discharged from the ICU + + - name: Home_monitoring + type: Y | N | NA + description: Whether individual is being remotely monitored by health officials at home without hospital admission (Y=Yes, N=No, NA=Not applicable) + + - name: Isolated (Y/N/NA) + type: Y | N | NA + description: Whether individual was isolated at home or in hospital (Y=Yes, N=No, NA=Not applicable) + + - name: Date_isolation + type: iso8601date + description: Date individual entered isolation + + - name: Outcome + type: recovered | death + description: Optional field that specifies outcome of the disease + + - name: Date_death + type: iso8601date + description: Date of death + + - name: Date_recovered + type: iso8601date + description: Date of recovery + + - name: Contact_with_case + type: Y | N | NA + Description: Has the individual had contact with a confirmed/ probable/ suspected case (Y=Yes, N=No, NA=Not applicable)? + + - name: Contact_ID + type: integer + description: If specified, is the case ID from which this patient contracted the virus + + - name: Contact_setting + type: string + description: Setting where contact occurred that led to transmission + examples: > + Dictionary of possible entries: + HOUSE= household + WORK= workplace + SCHOOL= school/nursery + HEALTH= healthcare (including laboratory exposure) + PARTY= Sexual contact at night club/private party/sauna or similar setting + BAR= Bar/restaurant/ or other small event where there was no sexual contact + LARGE = Large event with no sexual contact (e.g., festival or sports event) + LARGECONTACT = Large event with sexual contact + OTHER + UNK= Unknown + + - name: Contact_animal + description: Whether the individual has known contact with animals + examples: > + Dictionary of possible entries: + PET = Household pets excluding rodents + PETRODENTS = Rodent pets + WILD = Wild animals excluding rodent + WILDRODENTS = Wild rodents + Other (specify) + + - name: Contact_comment + type: string + description: Free text describing any additional contact information. + + - name: Transmission + type: string + description: Setting where contact occurred that led to transmission + examples: > + Dictionary of possible entries: + ANIMAL = Animal to human transmission + HAI = Healthcare-associated + LAB = Transmission in a laboratory due to occupational exposure + MTCT = Transmission from mother to child during pregnancy or at birth + OTHER = Other transmission + FOMITE = Contact with contaminated material (e.g bedding, clothing, objects) + PTP = Person-to-person (excluding: mother-to-child, healthcare-associated or sexual transmission) + SEX = Sexual transmission + TRANSFU = parenteral transmission including intravenous drug use and transfusion + UNK = Unknown + + - name: Travel_history (Y/N/NA) + type: Y | N | NA + description: Whether individual has travel history, domestic and/or international (Y=Yes, N=No, NA=Not applicable) + + - name: Travel_history_entry + type: string + description: Date when individual entered the country + + - name: Travel_history_start + type: string + description: Free text describing travel + + - name: Travel_history_location + type: string + description: Last known location where individual had travelled from + example: [Europe] + + - name: Travel_history_country + type: string + description: Last known country where individual had travelled from + example: [Spain] + + - name: Genomics_Metadata + type: string + description: Which clade the viral strain belongs to + examples: + - West African Clade + - North African Clade + + - name: Accession Number + type: string + description: Accession number of the sequence uploaded to public database + + - name: Source + type: url + description: URL of news story or government source where this case was confirmed + required: true + + - name: Source_II + type: url + description: URL of news story or government source where this case was confirmed (archived or secondary source) + + - name: Source_III + type: url + description: URL of news story or government source where this case was confirmed (archived or secondary source) + + - name: Source_IV + type: url + description: URL of news story or government source where this case was confirmed (archived or secondary source) + + - name: Source_V + type: url + description: URL of news story or government source where this case was confirmed (archived or secondary source) + + - name: Source_VI + type: url + description: URL of news story or government source where this case was confirmed (archived or secondary source) + + - name: Source_VII + type: url + description: URL of news story or government source where this case was confirmed (archived or secondary source) + + - name: Date_entry + type: iso8601date + description: Date case was entered into line list + required: true + + - name: Date_last_modified + type: iso8601date + description: Last date when case was modified in line list + required: true diff --git a/s3_ui/Dockerfile b/s3_ui/Dockerfile new file mode 100644 index 0000000..07068a2 --- /dev/null +++ b/s3_ui/Dockerfile @@ -0,0 +1,53 @@ +FROM python:3.10-slim as python-base + +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_NO_CACHE_DIR=off \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + POETRY_VERSION=1.1.14 \ + POETRY_HOME="/opt/poetry" \ + POETRY_VIRTUALENVS_IN_PROJECT=true \ + POETRY_NO_INTERACTION=1 \ + PYSETUP_PATH="/opt/pysetup" \ + VENV_PATH="/opt/pysetup/.venv" + +ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" + +FROM python-base as builder-base +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + curl \ + build-essential + +RUN curl -sSL https://install.python-poetry.org | python3 - + +WORKDIR $PYSETUP_PATH +COPY poetry.lock pyproject.toml ./ + +ENV PATH="${PATH}:/root/.poetry/bin" + +RUN poetry install --no-dev + +FROM python-base as development + +RUN apt-get update && apt-get upgrade -y curl \ + awscli + +WORKDIR $PYSETUP_PATH + +COPY --from=builder-base $POETRY_HOME $POETRY_HOME +COPY --from=builder-base $PYSETUP_PATH $PYSETUP_PATH +ENV PATH="${PATH}:/root/.poetry/bin" + +WORKDIR /app + +RUN mkdir -p templates + +COPY setup_localstack.py run.py logger.py ./ +COPY templates/* ./templates/ +COPY poetry.lock pyproject.toml ./ + +RUN poetry install --no-dev + +CMD python setup_localstack.py && python run.py diff --git a/s3_ui/Dockerfile-lambda b/s3_ui/Dockerfile-lambda new file mode 100644 index 0000000..996c4c9 --- /dev/null +++ b/s3_ui/Dockerfile-lambda @@ -0,0 +1,9 @@ +# 3.10 not yet available +FROM public.ecr.aws/lambda/python:3.9 +COPY . ${LAMBDA_TASK_ROOT} +COPY requirements.txt . + +RUN /var/lang/bin/python3.9 -m pip install --upgrade pip +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +CMD ["run.handler"] diff --git a/s3_ui/Dockerfile-test b/s3_ui/Dockerfile-test new file mode 100644 index 0000000..5cb4b98 --- /dev/null +++ b/s3_ui/Dockerfile-test @@ -0,0 +1,51 @@ +FROM python:3.10-slim as python-base + +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_NO_CACHE_DIR=off \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + POETRY_VERSION=1.1.14 \ + POETRY_HOME="/opt/poetry" \ + POETRY_VIRTUALENVS_IN_PROJECT=true \ + POETRY_NO_INTERACTION=1 \ + PYSETUP_PATH="/opt/pysetup" \ + VENV_PATH="/opt/pysetup/.venv" + +ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" + +FROM python-base as builder-base +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + curl \ + build-essential + +RUN curl -sSL https://install.python-poetry.org | python3 - + +WORKDIR $PYSETUP_PATH +COPY poetry.lock pyproject.toml ./ + +ENV PATH="${PATH}:/root/.poetry/bin" + +RUN poetry install + +FROM python-base as development + +RUN apt-get update && apt-get upgrade -y curl \ + awscli + +WORKDIR $PYSETUP_PATH + +COPY --from=builder-base $POETRY_HOME $POETRY_HOME +COPY --from=builder-base $PYSETUP_PATH $PYSETUP_PATH +ENV PATH="${PATH}:/root/.poetry/bin" + +WORKDIR /app + +COPY setup_localstack.py logger.py run.py test_data.py ./ +COPY templates/* ./templates/ +COPY poetry.lock pyproject.toml ./ + +RUN poetry install + +CMD python setup_localstack.py && python -m pytest -rs -v . diff --git a/s3_ui/README.md b/s3_ui/README.md new file mode 100644 index 0000000..a0d7fe9 --- /dev/null +++ b/s3_ui/README.md @@ -0,0 +1,26 @@ +# Ebola Data Navigator + +## What it does + +This folder contains the code needed to run a web service that exposes archived Ebola files. + +Users can use their web browsers to navigate and download files by using templated links. +The server finds files in desired folders and exposes on-demand presigned URLs to S3 objects. + +## How to run + +Developers can run the application via `./run.py`, building from the `Dockerfile` and running the created container, and running `run_stack.sh`. + +Running using `run_stack.sh` also creates and uses a mock AWS S3 service and adds fake data +(csv and json files). + +Developers can test the application via `./test.py`, building from the `Dockerfile` and running the created image, and running `test_stack.sh`. + +## How to deploy + +This service runs on AWS Lambda in a Docker container. +Deployment consists of building an image, deploying it to ECR, and restarting the Lambda function. + +To build the image, run `poetry export -f requirements.txt --output requirements.txt --without-hashes`, then `docker build -f Dockerfile-lambda -t lambda_s3 .`. +To push it to ECR, follow the [instructions given by AWS](https://docs.aws.amazon.com/AmazonECR/latest/userguide/docker-push-ecr-image.html). +To restart the Lambda function, use boto3 or the AWS web UI to deploy the new image to the function. diff --git a/s3_ui/docker-compose-test.yml b/s3_ui/docker-compose-test.yml new file mode 100644 index 0000000..ff36e29 --- /dev/null +++ b/s3_ui/docker-compose-test.yml @@ -0,0 +1,25 @@ +services: + test: + build: + context: ./ + dockerfile: "Dockerfile-test" + environment: + LOCALSTACK_URL: "http://localstack:4566" + S3_BUCKET: "fake" + AWS_ACCESS_KEY_ID: "fake" + AWS_SECRET_ACCESS_KEY: "fake" + localstack: + image: localstack/localstack + environment: + AWS_ACCESS_KEY_ID: "fake" + AWS_SECRET_ACCESS_KEY: "fake" + SERVICES: "s3" + DOCKER_HOST: "unix:///var/run/docker.sock" + START_WEB: 0 # https://github.com/localstack/localstack/issues/1466#issuecomment-599822542 + HOSTNAME: "localhost" + EAGER_SERVICE_LOADING: 1 + volumes: + - "/var/run/docker.sock:/var/run/docker.sock" + ports: + - "4566:4566" # LocalStack Gateway + - "4510-4559:4510-4559" # external services port range diff --git a/s3_ui/docker-compose.yml b/s3_ui/docker-compose.yml new file mode 100644 index 0000000..9c3e9e4 --- /dev/null +++ b/s3_ui/docker-compose.yml @@ -0,0 +1,27 @@ +services: + app: + build: + context: ./ + dockerfile: "Dockerfile" + ports: + - "5000:5000" # Flask + environment: + LOCALSTACK_URL: "http://localstack:4566" + S3_BUCKET: "fake" + AWS_ACCESS_KEY_ID: "fake" + AWS_SECRET_ACCESS_KEY: "fake" + localstack: + image: localstack/localstack + environment: + AWS_ACCESS_KEY_ID: "fake" + AWS_SECRET_ACCESS_KEY: "fake" + SERVICES: "s3" + DOCKER_HOST: "unix:///var/run/docker.sock" + START_WEB: 0 # https://github.com/localstack/localstack/issues/1466#issuecomment-599822542 + HOSTNAME: "localhost" + EAGER_SERVICE_LOADING: 1 + volumes: + - "/var/run/docker.sock:/var/run/docker.sock" + ports: + - "4566:4566" # LocalStack Gateway + - "4510-4559:4510-4559" # external services port range diff --git a/s3_ui/logger.py b/s3_ui/logger.py new file mode 100644 index 0000000..be0702f --- /dev/null +++ b/s3_ui/logger.py @@ -0,0 +1,9 @@ +import logging +import sys + + +def setup_logger(): + h = logging.StreamHandler(sys.stdout) + rootLogger = logging.getLogger() + rootLogger.addHandler(h) + rootLogger.setLevel(logging.DEBUG) diff --git a/s3_ui/poetry.lock b/s3_ui/poetry.lock new file mode 100644 index 0000000..ed9b887 --- /dev/null +++ b/s3_ui/poetry.lock @@ -0,0 +1,494 @@ +[[package]] +name = "atomicwrites" +version = "1.4.1" +description = "Atomic file writes." +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "attrs" +version = "22.1.0" +description = "Classes Without Boilerplate" +category = "dev" +optional = false +python-versions = ">=3.5" + +[package.extras] +dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] +docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] +tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] +tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "cloudpickle"] + +[[package]] +name = "boto3" +version = "1.24.39" +description = "The AWS SDK for Python" +category = "main" +optional = false +python-versions = ">= 3.7" + +[package.dependencies] +botocore = ">=1.27.39,<1.28.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.6.0,<0.7.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.27.39" +description = "Low-level, data-driven core of boto 3." +category = "main" +optional = false +python-versions = ">= 3.7" + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = ">=1.25.4,<1.27" + +[package.extras] +crt = ["awscrt (==0.13.8)"] + +[[package]] +name = "certifi" +version = "2022.6.15" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "charset-normalizer" +version = "2.1.0" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" +optional = false +python-versions = ">=3.6.0" + +[package.extras] +unicode_backport = ["unicodedata2"] + +[[package]] +name = "click" +version = "8.1.3" +description = "Composable command line interface toolkit" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.5" +description = "Cross-platform colored terminal text." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "faker" +version = "13.15.1" +description = "Faker is a Python package that generates fake data for you." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +python-dateutil = ">=2.4" + +[[package]] +name = "flask" +version = "2.1.3" +description = "A simple framework for building complex web applications." +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +click = ">=8.0" +itsdangerous = ">=2.0" +Jinja2 = ">=3.0" +Werkzeug = ">=2.0" + +[package.extras] +async = ["asgiref (>=3.2)"] +dotenv = ["python-dotenv"] + +[[package]] +name = "idna" +version = "3.3" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "iniconfig" +version = "1.1.1" +description = "iniconfig: brain-dead simple config-ini parsing" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "itsdangerous" +version = "2.1.2" +description = "Safely pass data to untrusted environments and back." +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "jinja2" +version = "3.1.2" +description = "A very fast and expressive template engine." +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +MarkupSafe = ">=2.0" + +[package.extras] +i18n = ["Babel (>=2.7)"] + +[[package]] +name = "jmespath" +version = "1.0.1" +description = "JSON Matching Expressions" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "markupsafe" +version = "2.1.1" +description = "Safely add untrusted strings to HTML/XML markup." +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "packaging" +version = "21.3" +description = "Core utilities for Python packages" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" + +[[package]] +name = "pluggy" +version = "1.0.0" +description = "plugin and hook calling mechanisms for python" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "py" +version = "1.11.0" +description = "library with cross-python path, ini-parsing, io, code, log facilities" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "pyparsing" +version = "3.0.9" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +category = "dev" +optional = false +python-versions = ">=3.6.8" + +[package.extras] +diagrams = ["railroad-diagrams", "jinja2"] + +[[package]] +name = "pytest" +version = "7.1.2" +description = "pytest: simple powerful testing with Python" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} +attrs = ">=19.2.0" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +py = ">=1.8.2" +tomli = ">=1.0.0" + +[package.extras] +testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "requests" +version = "2.28.1" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=3.7, <4" + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<3" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "s3transfer" +version = "0.6.0" +description = "An Amazon S3 Transfer Manager" +category = "main" +optional = false +python-versions = ">= 3.7" + +[package.dependencies] +botocore = ">=1.12.36,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] + +[[package]] +name = "serverless-wsgi" +version = "3.0.0" +description = "Amazon AWS API Gateway WSGI wrapper" +category = "main" +optional = false +python-versions = ">3.6" + +[package.dependencies] +werkzeug = ">2" + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "urllib3" +version = "1.26.11" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4" + +[package.extras] +brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"] +secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[[package]] +name = "werkzeug" +version = "2.2.1" +description = "The comprehensive WSGI web application library." +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +MarkupSafe = ">=2.1.1" + +[package.extras] +watchdog = ["watchdog"] + +[metadata] +lock-version = "1.1" +python-versions = "^3.10" +content-hash = "06cd469827bca55f7c780c628edb579772bfd9d5c08e9aee7f7ca76110b36e14" + +[metadata.files] +atomicwrites = [ + {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, +] +attrs = [ + {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, + {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, +] +boto3 = [ + {file = "boto3-1.24.39-py3-none-any.whl", hash = "sha256:057b4132e81f5ba63dc7d9d989023f3e81216999d76edabffade5bad5ae9bd3e"}, + {file = "boto3-1.24.39.tar.gz", hash = "sha256:c303fa76dde0422476ed2413fde8e34f539fede087910ad1f911fee6546a4be3"}, +] +botocore = [ + {file = "botocore-1.27.39-py3-none-any.whl", hash = "sha256:b5eb8731542e7e987465be80ce206d72de6774e16d4e808e636d92c467779d7e"}, + {file = "botocore-1.27.39.tar.gz", hash = "sha256:12d9f5866da51738201d00f6df76571e2f46233d8effb5cdcfe871616bff6197"}, +] +certifi = [ + {file = "certifi-2022.6.15-py3-none-any.whl", hash = "sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412"}, + {file = "certifi-2022.6.15.tar.gz", hash = "sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d"}, +] +charset-normalizer = [ + {file = "charset-normalizer-2.1.0.tar.gz", hash = "sha256:575e708016ff3a5e3681541cb9d79312c416835686d054a23accb873b254f413"}, + {file = "charset_normalizer-2.1.0-py3-none-any.whl", hash = "sha256:5189b6f22b01957427f35b6a08d9a0bc45b46d3788ef5a92e978433c7a35f8a5"}, +] +click = [ + {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, + {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, +] +colorama = [ + {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"}, + {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"}, +] +faker = [ + {file = "Faker-13.15.1-py3-none-any.whl", hash = "sha256:172e45220b7a46743f4fb58cf380adb306d5c3ab1c0b0d97062508474cec5ff8"}, + {file = "Faker-13.15.1.tar.gz", hash = "sha256:7c3f8ee807d3916415568169a172bf0893ea9cc3371ab55e4e5f5170d2185bea"}, +] +flask = [ + {file = "Flask-2.1.3-py3-none-any.whl", hash = "sha256:9013281a7402ad527f8fd56375164f3aa021ecfaff89bfe3825346c24f87e04c"}, + {file = "Flask-2.1.3.tar.gz", hash = "sha256:15972e5017df0575c3d6c090ba168b6db90259e620ac8d7ea813a396bad5b6cb"}, +] +idna = [ + {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, + {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, +] +iniconfig = [ + {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, + {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, +] +itsdangerous = [ + {file = "itsdangerous-2.1.2-py3-none-any.whl", hash = "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44"}, + {file = "itsdangerous-2.1.2.tar.gz", hash = "sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a"}, +] +jinja2 = [ + {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, + {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, +] +jmespath = [ + {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, + {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, +] +markupsafe = [ + {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-win32.whl", hash = "sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-win32.whl", hash = "sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-win32.whl", hash = "sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-win32.whl", hash = "sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247"}, + {file = "MarkupSafe-2.1.1.tar.gz", hash = "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b"}, +] +packaging = [ + {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, + {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, +] +pluggy = [ + {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, + {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, +] +py = [ + {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, + {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, +] +pyparsing = [ + {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, + {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, +] +pytest = [ + {file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"}, + {file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"}, +] +python-dateutil = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] +requests = [ + {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"}, + {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"}, +] +s3transfer = [ + {file = "s3transfer-0.6.0-py3-none-any.whl", hash = "sha256:06176b74f3a15f61f1b4f25a1fc29a4429040b7647133a463da8fa5bd28d5ecd"}, + {file = "s3transfer-0.6.0.tar.gz", hash = "sha256:2ed07d3866f523cc561bf4a00fc5535827981b117dd7876f036b0c1aca42c947"}, +] +serverless-wsgi = [ + {file = "serverless-wsgi-3.0.0.tar.gz", hash = "sha256:e5d699dfefc3e593c11a07b9af2d89cdccd3110d9a1ee28c83a9fcf301c6fcbc"}, + {file = "serverless_wsgi-3.0.0-py2.py3-none-any.whl", hash = "sha256:2bb42fbb23eb8f0ccfc5666c7b6fc6af36f9948336dbb99d033416447e5a1d4c"}, +] +six = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] +tomli = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] +urllib3 = [ + {file = "urllib3-1.26.11-py2.py3-none-any.whl", hash = "sha256:c33ccba33c819596124764c23a97d25f32b28433ba0dedeb77d873a38722c9bc"}, + {file = "urllib3-1.26.11.tar.gz", hash = "sha256:ea6e8fb210b19d950fab93b60c9009226c63a28808bc8386e05301e25883ac0a"}, +] +werkzeug = [ + {file = "Werkzeug-2.2.1-py3-none-any.whl", hash = "sha256:7e1db6a5ba6b9a8be061e47e900456355b8714c0f238b0313f53afce1a55a79a"}, + {file = "Werkzeug-2.2.1.tar.gz", hash = "sha256:4d7013ef96fd197d1cdeb03e066c6c5a491ccb44758a5b2b91137319383e5a5a"}, +] diff --git a/s3_ui/pyproject.toml b/s3_ui/pyproject.toml new file mode 100644 index 0000000..adbc649 --- /dev/null +++ b/s3_ui/pyproject.toml @@ -0,0 +1,20 @@ +[tool.poetry] +name = "ebola-data" +version = "0.1.0" +description = "" +authors = ["Global.health team "] + +[tool.poetry.dependencies] +python = "^3.9" +requests = "^2.28.1" +boto3 = "^1.24.39" +Faker = "^13.15.1" +Flask = "^2.1.3" +serverless-wsgi = "^3.0.0" + +[tool.poetry.dev-dependencies] +pytest = "^7.1.2" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/s3_ui/requirements.txt b/s3_ui/requirements.txt new file mode 100644 index 0000000..61644b2 --- /dev/null +++ b/s3_ui/requirements.txt @@ -0,0 +1,20 @@ +boto3==1.24.39; python_version >= "3.7" +botocore==1.27.39; python_version >= "3.7" +certifi==2022.6.15; python_version >= "3.7" and python_version < "4" +charset-normalizer==2.1.0; python_version >= "3.7" and python_version < "4" and python_full_version >= "3.6.0" +click==8.1.3; python_version >= "3.7" +colorama==0.4.5; python_version >= "3.7" and python_full_version < "3.0.0" and platform_system == "Windows" or platform_system == "Windows" and python_version >= "3.7" and python_full_version >= "3.5.0" +faker==13.15.1; python_version >= "3.6" +flask==2.1.3; python_version >= "3.7" +idna==3.3; python_version >= "3.7" and python_version < "4" +itsdangerous==2.1.2; python_version >= "3.7" +jinja2==3.1.2; python_version >= "3.7" +jmespath==1.0.1; python_version >= "3.7" +markupsafe==2.1.1; python_version >= "3.7" +python-dateutil==2.8.2; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.7" +requests==2.28.1; python_version >= "3.7" and python_version < "4" +s3transfer==0.6.0; python_version >= "3.7" +serverless-wsgi==3.0.0; python_version > "3.6" +six==1.16.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.7" +urllib3==1.26.11; python_version >= "3.7" and python_full_version < "3.0.0" and python_version < "4" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.7" +werkzeug==2.2.1; python_version >= "3.7" diff --git a/s3_ui/run.py b/s3_ui/run.py new file mode 100644 index 0000000..fb7b88c --- /dev/null +++ b/s3_ui/run.py @@ -0,0 +1,88 @@ +import logging +import os + +import boto3 +from flask import Flask, render_template, redirect, request +import serverless_wsgi + +from logger import setup_logger + + +LOCALSTACK_URL = os.environ.get("LOCALSTACK_URL") +S3_BUCKET = os.environ.get("S3_BUCKET") + +ARCHIVES = "archive" + +FOLDERS = [ARCHIVES] + +LATEST_FILES = ["latest.csv"] + +FLASK_HOST = os.environ.get("FLASK_HOST", "0.0.0.0") +FLASK_PORT = os.environ.get("FLASK_PORT", 5000) +FLASK_DEBUG = os.environ.get("FLASK_DEBUG", False) + +APP = Flask(__name__) + + +@APP.route("/") +def home(): + return render_template("index.html", files=LATEST_FILES) + + +@APP.route(f"/{ARCHIVES}") +def get_archive_files(): + try: + files = [f.split("/")[1] for f in list_bucket_contents(ARCHIVES)] + logging.debug(f"Files in {ARCHIVES} folder: {files}") + return render_template("folder.html", folder=ARCHIVES, files=files) + except Exception as exc: + return f"Exception: {exc}" + + +def list_bucket_contents(folder: str) -> list[str]: + logging.debug(f"Listing bucket contents for folder {folder}") + client = create_s3_client() + response = client.list_objects(Bucket=S3_BUCKET, + Prefix=f"{folder}/", + Delimiter="/" + ) + contents = [] + for obj in response.get("Contents", []): + contents.append(obj.get("Key")) + + logging.debug(f"Listed objects for prefix {folder}: {contents}") + return contents + + +@APP.route("/url") +def get_presigned_url(): + args = request.args + folder = args.get('folder', '') + file_name = args.get('file_name', '') + target = "" + if not folder: + target = file_name + else: + target = f"{folder}/{file_name}" + logging.debug(f"Creating presigned URL for {target}") + client = create_s3_client() + params = {"Bucket": S3_BUCKET, "Key": f"{target}"} + return redirect(client.generate_presigned_url("get_object", Params=params, ExpiresIn=60)) + + +def create_s3_client() -> object: + if LOCALSTACK_URL: + logging.debug(f"Creating an S3 client using Localstack at {LOCALSTACK_URL}") + return boto3.client("s3", endpoint_url=LOCALSTACK_URL) + logging.debug("Creating an S3 client using AWS") + return boto3.client("s3") + + +def handler(event, context): + return serverless_wsgi.handle_request(APP, event, context) + + +if __name__ == "__main__": + setup_logger() + logging.info("Starting Flask...") + APP.run(FLASK_HOST, FLASK_PORT, debug=FLASK_DEBUG) diff --git a/s3_ui/run_stack.sh b/s3_ui/run_stack.sh new file mode 100755 index 0000000..e198ec6 --- /dev/null +++ b/s3_ui/run_stack.sh @@ -0,0 +1,2 @@ +#!/bin/bash +docker compose -f docker-compose.yml up --build --force-recreate --renew-anon-volumes diff --git a/s3_ui/setup_localstack.py b/s3_ui/setup_localstack.py new file mode 100644 index 0000000..88cccda --- /dev/null +++ b/s3_ui/setup_localstack.py @@ -0,0 +1,78 @@ +import csv +from decimal import Decimal +from datetime import date +import logging +import os +from time import sleep + +import boto3 +from faker import Faker +import requests + +from logger import setup_logger +from run import FOLDERS + + +LOCALSTACK_URL = os.environ.get("LOCALSTACK_URL", "http://localstack:4566") +S3_BUCKET = os.environ.get("S3_BUCKET", "monkeypox") +S3_CLIENT = boto3.client("s3", endpoint_url=LOCALSTACK_URL) + +FAKE = Faker() + + +def wait_for_localstack(): + logging.info("Waiting for localstack") + healthcheck_url = "".join([LOCALSTACK_URL, "/health"]) + counter = 0 + while counter < 42: + try: + response = requests.get(healthcheck_url) + s3_status = response.json().get("services", {}).get("s3") + if s3_status == "running": + return + except requests.exceptions.ConnectionError: + pass + counter += 1 + sleep(5) + raise Exception("Localstack not available") + + +def create_bucket(bucket_name: str) -> None: + logging.info(f"Creating bucket {bucket_name}") + S3_CLIENT.create_bucket(Bucket=bucket_name) + + +def create_fake_data() -> list[dict]: + logging.info("Creating fake data") + return [FAKE.profile() for _ in range(0, 42)] + + +def create_fake_file(file_name: str=FAKE.file_name(), data: list[dict]=[]) -> str: + print(f"File name: {file_name}") + fn_w_ext = f"{file_name}.csv" + logging.info(f"Using fake data to create file {fn_w_ext}") + with open(fn_w_ext, "w") as fh: + fields = list(data[0].keys()) + writer = csv.DictWriter(fh, fieldnames=fields) + writer.writeheader() + for row in data: + writer.writerow(row) + return fn_w_ext + + +def upload_file(folder: str, file_name: str) -> None: + logging.info(f"Uploading file {file_name} to folder {S3_BUCKET}/{folder}") + S3_CLIENT.upload_file(file_name, S3_BUCKET, f"{folder}/{file_name}") + + +if __name__ == "__main__": + setup_logger() + logging.info("Starting script") + wait_for_localstack() + create_bucket(S3_BUCKET) + for folder in FOLDERS: + print(f"Folder in setup: {folder}") + for _ in range(0, 3): + data = create_fake_data() + file_name = create_fake_file(data=data) + upload_file(folder, file_name) diff --git a/s3_ui/templates/folder.html b/s3_ui/templates/folder.html new file mode 100644 index 0000000..bad2f2f --- /dev/null +++ b/s3_ui/templates/folder.html @@ -0,0 +1,21 @@ + + + + + Ebola Data Files + + +
+

Contents of {{ folder }}

+
+ {% for f in files %} +
+ {{ f }} +
+ {% endfor %} +
+
+ Home +
+ + diff --git a/s3_ui/templates/index.html b/s3_ui/templates/index.html new file mode 100644 index 0000000..c8bf532 --- /dev/null +++ b/s3_ui/templates/index.html @@ -0,0 +1,18 @@ + + + + + Ebola Data Files + + +
+

Contents of G.h Ebola data bucket

+
+
Line List Archives
+ {% for f in files %} +
+ {{ f }} +
+ {% endfor %} + + diff --git a/s3_ui/test_data.py b/s3_ui/test_data.py new file mode 100644 index 0000000..dc6bf81 --- /dev/null +++ b/s3_ui/test_data.py @@ -0,0 +1,83 @@ +import os +import random +import re +from urllib.parse import urlparse + +from flask import url_for +import pytest + +from run import (APP, list_bucket_contents, LATEST_FILES, FOLDERS) + + +LOCALSTACK_URL = os.environ.get("LOCALSTACK_URL") +S3_BUCKET = os.environ.get("S3_BUCKET") + + +@pytest.fixture() +def flask_app(): + APP.config.update({ + "TESTING": True, + }) + yield APP + + +@pytest.fixture() +def client(flask_app): + with APP.app_context(): + with APP.test_client() as client: + yield client + +@pytest.fixture() +def runner(flask_app): + return flask_app.test_cli_runner() + + +def test_folders_displayed(client): + response = client.get("/") + assert "Line List Archives" in response.text + for file_name in LATEST_FILES: + assert f"{file_name}" in response.text + + +@pytest.mark.parametrize("endpoint", FOLDERS) +def test_folders_contain_files(client, endpoint): + response = client.get(f"/{endpoint}") + assert "csv" in response.text + + +@pytest.mark.skipif(not (S3_BUCKET or LOCALSTACK_URL), reason="Target S3 bucket must be set") +@pytest.mark.parametrize("folder", FOLDERS) +def test_files_downloadable(client, folder): + print(f"Folder in test: {folder}") + file_name = random.choice([f.split("/")[1] for f in list_bucket_contents(folder)]) + response = client.get(f"/url?folder={folder}&file_name={file_name}") + + assert response.status_code == 302 + + redirect = "" + if match := re.search(r"href=[\"']?([^\"' >]+)", response.text): + redirect = match.group(1) + else: + pytest.fail("The web page should show a hyperlink") + + try: + _ = urlparse(redirect) + except: + pytest.fail("The service should template query params its endpoint") + assert f"{folder}/{file_name}" in redirect + endpoint = url_for("get_presigned_url", folder=folder, file_name=file_name) + response = client.get(endpoint) + + assert response.status_code == 302 + + presigned = "" + + if match := re.search(r"href=[\"']?([^\"' >]+)", response.text): + presigned = match.group(1) + else: + pytest.fail("The endpoint should return a presigned URL") + + assert redirect == presigned, f"URLs do not match: expected {redirect}, got {presigned}" + + for file_name in LATEST_FILES: + response = client.get(f"/url?file_name={file_name}") diff --git a/s3_ui/test_stack.sh b/s3_ui/test_stack.sh new file mode 100755 index 0000000..7c06704 --- /dev/null +++ b/s3_ui/test_stack.sh @@ -0,0 +1,2 @@ +#!/bin/bash +docker compose -f docker-compose-test.yml up --build --force-recreate --remove-orphans --renew-anon-volumes --exit-code-from test diff --git a/scripts/Dockerfile b/scripts/Dockerfile new file mode 100644 index 0000000..dfdb2a4 --- /dev/null +++ b/scripts/Dockerfile @@ -0,0 +1,58 @@ +FROM python:3.10-slim as python-base + +ARG SCRIPT_DIR + +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_NO_CACHE_DIR=off \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + POETRY_VERSION=1.1.14 \ + POETRY_HOME="/opt/poetry" \ + POETRY_VIRTUALENVS_IN_PROJECT=true \ + POETRY_NO_INTERACTION=1 \ + PYSETUP_PATH="/opt/pysetup" \ + VENV_PATH="/opt/pysetup/.venv" + +ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" + +FROM python-base as builder-base +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + curl \ + build-essential + +RUN curl -sSL https://install.python-poetry.org | python3 - + +WORKDIR $PYSETUP_PATH +COPY ${SCRIPT_DIR}/poetry.lock ${SCRIPT_DIR}/pyproject.toml ./ + +ENV PATH="${PATH}:/root/.poetry/bin" + +RUN poetry install --no-dev + +FROM python-base as development + +RUN apt-get update && apt-get upgrade -y curl \ + awscli + +WORKDIR $PYSETUP_PATH + +COPY --from=builder-base $POETRY_HOME $POETRY_HOME +COPY --from=builder-base $PYSETUP_PATH $PYSETUP_PATH +ENV PATH="${PATH}:/root/.poetry/bin" + +ARG NOTIFY_WEBHOOK_URL +ENV NOTIFY_WEBHOOK_URL=${NOTIFY_WEBHOOK_URL} +ENV EPID_INGESTION_ENV "XXXXXXXXXXX" +ENV EPID_INGESTION_SOURCE_ID "XXXXXXXXXXX" + +WORKDIR /app + +RUN mkdir -p templates + +COPY ${SCRIPT_DIR}/* ./ + +RUN poetry install --no-dev + +CMD ./run.sh \ No newline at end of file diff --git a/scripts/Dockerfile-test b/scripts/Dockerfile-test new file mode 100644 index 0000000..d10a6f3 --- /dev/null +++ b/scripts/Dockerfile-test @@ -0,0 +1,33 @@ +FROM python:3.10-slim + +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_NO_CACHE_DIR=off \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + POETRY_VERSION=1.1.14 \ + POETRY_HOME="/opt/poetry" \ + POETRY_VIRTUALENVS_IN_PROJECT=true \ + POETRY_NO_INTERACTION=1 \ + PYSETUP_PATH="/opt/pysetup" \ + VENV_PATH="/opt/pysetup/.venv" + +ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" + +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + curl + +RUN curl -sSL https://install.python-poetry.org | python3 - + +ENV PATH="${PATH}:/root/.poetry/bin" + +RUN mkdir -p script + +WORKDIR script + +COPY ./* ./ + +RUN poetry install + +CMD ./test.sh diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..c4c0dde --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,30 @@ +# Ebola analytics scripts + +This folder contains analytics and helper scripts, configuration for their runtime and testing environments. + +The analysis performed by the scripts in this folder includes: +* Ingestion of data from GSheets to S3 and MongoDB + +## Use + +To start the stack, run `run_stack.sh`. +This runs the scripts, using mocks of AWS and Slack to receive their outputs. + +## Testing + +To test the stack, run `test_stack.sh`. +This runs the scripts, using mocks of AWS and Slack to receive their outputs, followed by a set of assertions about behavior (e.g. the AWS mock should contain data, the Slack mock should contain messages). + +## Deployment + +Scripts can run inside containers on AWS Batch with Fargate. + +To build the image, run `docker build -f ./../Dockerfile -t .`. +To push it to ECR, run: +``` +docker tag .dkr.ecr.eu-central-1.amazonaws.com/: +aws ecr get-login-password --region eu-central-1 | docker login --username AWS --password-stdin .dkr.ecr.eu-central-1.amazonaws.com +docker push .dkr.ecr.eu-central-1.amazonaws.com/: +``` + +For more information, see the [AWS docs](https://docs.aws.amazon.com/AmazonECR/latest/userguide/docker-push-ecr-image.html). diff --git a/scripts/ingestion/docker-compose-test.yml b/scripts/ingestion/docker-compose-test.yml new file mode 100644 index 0000000..711cccc --- /dev/null +++ b/scripts/ingestion/docker-compose-test.yml @@ -0,0 +1,37 @@ +services: + test: + build: + context: ./ + dockerfile: ./../Dockerfile-test + args: + SCRIPT_DIR: "ingestion" + environment: + DOCKERIZED: "absolutely" + LOCALSTACK_URL: "http://localstack:4566" + S3_BUCKET: "fake" + DB_CONNECTION: "mongodb://mongo:27017" + DATABASE_NAME: "ebola" + GH_COLLECTION: "gh" + AWS_ACCESS_KEY_ID: "fake" + AWS_SECRET_ACCESS_KEY: "fake" + localstack: + image: localstack/localstack + environment: + AWS_ACCESS_KEY_ID: "fake" + AWS_SECRET_ACCESS_KEY: "fake" + SERVICES: "s3" + DOCKER_HOST: "unix:///var/run/docker.sock" + volumes: + - "${LOCALSTACK_VOLUME_DIR:-./volume}:/var/lib/localstack" + - "/var/run/docker.sock:/var/run/docker.sock" + ports: + - "4566:4566" # LocalStack Gateway + - "4510-4559:4510-4559" # external services port range + mongo: + image: mongo:5.0.12 + restart: always + init: true + ports: + - "27017:27017" + environment: + MONGO_INITDB_DATABASE: "ebola" \ No newline at end of file diff --git a/scripts/ingestion/docker-compose.yml b/scripts/ingestion/docker-compose.yml new file mode 100644 index 0000000..4f5a81b --- /dev/null +++ b/scripts/ingestion/docker-compose.yml @@ -0,0 +1,37 @@ +services: + app: + build: + context: ./ + dockerfile: ./../Dockerfile + args: + SCRIPT_DIR: "ingestion" + environment: + DOCKERIZED: "absolutely" + LOCALSTACK_URL: "http://localstack:4566" + S3_BUCKET: "fake" + DB_CONNECTION: "mongodb://mongo:27017" + DATABASE_NAME: "ebola" + GH_COLLECTION: "gh" + AWS_ACCESS_KEY_ID: "fake" + AWS_SECRET_ACCESS_KEY: "fake" + localstack: + image: localstack/localstack + environment: + AWS_ACCESS_KEY_ID: "fake" + AWS_SECRET_ACCESS_KEY: "fake" + SERVICES: "s3" + DOCKER_HOST: "unix:///var/run/docker.sock" + volumes: + - "${LOCALSTACK_VOLUME_DIR:-./volume}:/var/lib/localstack" + - "/var/run/docker.sock:/var/run/docker.sock" + ports: + - "4566:4566" # LocalStack Gateway + - "4510-4559:4510-4559" # external services port range + mongo: + image: mongo:5.0.12 + restart: always + init: true + ports: + - "27017:27017" + environment: + MONGO_INITDB_DATABASE: "ebola" diff --git a/scripts/ingestion/poetry.lock b/scripts/ingestion/poetry.lock new file mode 100644 index 0000000..c51112c --- /dev/null +++ b/scripts/ingestion/poetry.lock @@ -0,0 +1,737 @@ +[[package]] +name = "attrs" +version = "22.1.0" +description = "Classes Without Boilerplate" +category = "dev" +optional = false +python-versions = ">=3.5" + +[package.extras] +dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] +docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] +tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] +tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "cloudpickle"] + +[[package]] +name = "boto3" +version = "1.25.1" +description = "The AWS SDK for Python" +category = "main" +optional = false +python-versions = ">= 3.7" + +[package.dependencies] +botocore = ">=1.28.1,<1.29.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.6.0,<0.7.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.28.1" +description = "Low-level, data-driven core of boto 3." +category = "main" +optional = false +python-versions = ">= 3.7" + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = ">=1.25.4,<1.27" + +[package.extras] +crt = ["awscrt (==0.14.0)"] + +[[package]] +name = "cachetools" +version = "5.2.0" +description = "Extensible memoizing collections and decorators" +category = "main" +optional = false +python-versions = "~=3.7" + +[[package]] +name = "certifi" +version = "2022.9.24" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "charset-normalizer" +version = "2.1.1" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" +optional = false +python-versions = ">=3.6.0" + +[package.extras] +unicode_backport = ["unicodedata2"] + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" + +[[package]] +name = "dnspython" +version = "2.2.1" +description = "DNS toolkit" +category = "main" +optional = false +python-versions = ">=3.6,<4.0" + +[package.extras] +dnssec = ["cryptography (>=2.6,<37.0)"] +curio = ["curio (>=1.2,<2.0)", "sniffio (>=1.1,<2.0)"] +doh = ["h2 (>=4.1.0)", "httpx (>=0.21.1)", "requests (>=2.23.0,<3.0.0)", "requests-toolbelt (>=0.9.1,<0.10.0)"] +idna = ["idna (>=2.1,<4.0)"] +trio = ["trio (>=0.14,<0.20)"] +wmi = ["wmi (>=1.5.1,<2.0.0)"] + +[[package]] +name = "exceptiongroup" +version = "1.0.0rc9" +description = "Backport of PEP 654 (exception groups)" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "google-api-core" +version = "2.10.2" +description = "Google API client core library" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +google-auth = ">=1.25.0,<3.0dev" +googleapis-common-protos = ">=1.56.2,<2.0dev" +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" +requests = ">=2.18.0,<3.0.0dev" + +[package.extras] +grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio-status (>=1.33.2,<2.0dev)"] +grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0dev)"] +grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0dev)"] + +[[package]] +name = "google-api-python-client" +version = "2.65.0" +description = "Google API Client Library for Python" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +google-api-core = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev" +google-auth = ">=1.19.0,<3.0.0dev" +google-auth-httplib2 = ">=0.1.0" +httplib2 = ">=0.15.0,<1dev" +uritemplate = ">=3.0.1,<5" + +[[package]] +name = "google-auth" +version = "2.13.0" +description = "Google Authentication Library" +category = "main" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*" + +[package.dependencies] +cachetools = ">=2.0.0,<6.0" +pyasn1-modules = ">=0.2.1" +rsa = {version = ">=3.1.4,<5", markers = "python_version >= \"3.6\""} +six = ">=1.9.0" + +[package.extras] +aiohttp = ["requests (>=2.20.0,<3.0.0dev)", "aiohttp (>=3.6.2,<4.0.0dev)"] +enterprise_cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"] +pyopenssl = ["pyopenssl (>=20.0.0)"] +reauth = ["pyu2f (>=0.1.5)"] + +[[package]] +name = "google-auth-httplib2" +version = "0.1.0" +description = "Google Authentication Library: httplib2 transport" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +google-auth = "*" +httplib2 = ">=0.15.0" +six = "*" + +[[package]] +name = "google-auth-oauthlib" +version = "0.7.0" +description = "Google Authentication Library" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +google-auth = ">=2.13.0" +requests-oauthlib = ">=0.7.0" + +[package.extras] +tool = ["click (>=6.0.0)"] + +[[package]] +name = "googleapis-common-protos" +version = "1.56.4" +description = "Common protobufs used in Google APIs" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +protobuf = ">=3.15.0,<5.0.0dev" + +[package.extras] +grpc = ["grpcio (>=1.0.0,<2.0.0dev)"] + +[[package]] +name = "httplib2" +version = "0.20.4" +description = "A comprehensive HTTP client library." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[package.dependencies] +pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0.2,<3.0.3 || >3.0.3,<4", markers = "python_version > \"3.0\""} + +[[package]] +name = "idna" +version = "3.4" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "iniconfig" +version = "1.1.1" +description = "iniconfig: brain-dead simple config-ini parsing" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "jmespath" +version = "1.0.1" +description = "JSON Matching Expressions" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "oauthlib" +version = "3.2.2" +description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.extras] +rsa = ["cryptography (>=3.0.0)"] +signals = ["blinker (>=1.4.0)"] +signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] + +[[package]] +name = "packaging" +version = "21.3" +description = "Core utilities for Python packages" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" + +[[package]] +name = "pluggy" +version = "1.0.0" +description = "plugin and hook calling mechanisms for python" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "protobuf" +version = "4.21.8" +description = "" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "pyasn1" +version = "0.4.8" +description = "ASN.1 types and codecs" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "pyasn1-modules" +version = "0.2.8" +description = "A collection of ASN.1-based protocols modules." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +pyasn1 = ">=0.4.6,<0.5.0" + +[[package]] +name = "pygsheets" +version = "2.0.5" +description = "Google Spreadsheets Python API v4" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[package.dependencies] +google-api-python-client = ">=1.5.5" +google-auth-oauthlib = "*" + +[package.extras] +pandas = ["pandas (>=0.14.0)"] + +[[package]] +name = "pymongo" +version = "4.3.2" +description = "Python driver for MongoDB " +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +dnspython = ">=1.16.0,<3.0.0" + +[package.extras] +aws = ["pymongo-auth-aws (<2.0.0)"] +encryption = ["pymongocrypt (>=1.3.0,<2.0.0)"] +gssapi = ["pykerberos"] +ocsp = ["pyopenssl (>=17.2.0)", "requests (<3.0.0)", "service-identity (>=18.1.0)", "certifi"] +snappy = ["python-snappy"] +zstd = ["zstandard"] + +[[package]] +name = "pyparsing" +version = "3.0.9" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +category = "main" +optional = false +python-versions = ">=3.6.8" + +[package.extras] +diagrams = ["railroad-diagrams", "jinja2"] + +[[package]] +name = "pytest" +version = "7.2.0" +description = "pytest: simple powerful testing with Python" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +attrs = ">=19.2.0" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "requests" +version = "2.28.1" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=3.7, <4" + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<3" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "requests-oauthlib" +version = "1.3.1" +description = "OAuthlib authentication support for Requests." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[package.dependencies] +oauthlib = ">=3.0.0" +requests = ">=2.0.0" + +[package.extras] +rsa = ["oauthlib[signedtoken] (>=3.0.0)"] + +[[package]] +name = "rsa" +version = "4.9" +description = "Pure-Python RSA implementation" +category = "main" +optional = false +python-versions = ">=3.6,<4" + +[package.dependencies] +pyasn1 = ">=0.1.3" + +[[package]] +name = "s3transfer" +version = "0.6.0" +description = "An Amazon S3 Transfer Manager" +category = "main" +optional = false +python-versions = ">= 3.7" + +[package.dependencies] +botocore = ">=1.12.36,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "uritemplate" +version = "4.1.1" +description = "Implementation of RFC 6570 URI Templates" +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "urllib3" +version = "1.26.12" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4" + +[package.extras] +brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"] +secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "urllib3-secure-extra", "ipaddress"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[metadata] +lock-version = "1.1" +python-versions = "^3.10" +content-hash = "9fb502dad38cf4a4cd5f4ea833696412e1680dcbf281e8149bdf7e7dad591ed0" + +[metadata.files] +attrs = [ + {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, + {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, +] +boto3 = [ + {file = "boto3-1.25.1-py3-none-any.whl", hash = "sha256:5684030fb1fa742c9bec33bee1a0829ff4b4bb2cdef40d9465969fdb55b501bc"}, + {file = "boto3-1.25.1.tar.gz", hash = "sha256:9517b1d517b024a259a116a0206ae4a471e2ffab57db1b41a3ce6e3f8042001a"}, +] +botocore = [ + {file = "botocore-1.28.1-py3-none-any.whl", hash = "sha256:e751045bee771d99d1baa06775df38511a5025cab6ceb2219a2a27cc2abd3ee5"}, + {file = "botocore-1.28.1.tar.gz", hash = "sha256:2ebaf48c9cd61ad5532ac639569837bce3e0470991c5f1bee9fe3ef7d0362c42"}, +] +cachetools = [ + {file = "cachetools-5.2.0-py3-none-any.whl", hash = "sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db"}, + {file = "cachetools-5.2.0.tar.gz", hash = "sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757"}, +] +certifi = [ + {file = "certifi-2022.9.24-py3-none-any.whl", hash = "sha256:90c1a32f1d68f940488354e36370f6cca89f0f106db09518524c88d6ed83f382"}, + {file = "certifi-2022.9.24.tar.gz", hash = "sha256:0d9c601124e5a6ba9712dbc60d9c53c21e34f5f641fe83002317394311bdce14"}, +] +charset-normalizer = [ + {file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"}, + {file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"}, +] +colorama = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] +dnspython = [ + {file = "dnspython-2.2.1-py3-none-any.whl", hash = "sha256:a851e51367fb93e9e1361732c1d60dab63eff98712e503ea7d92e6eccb109b4f"}, + {file = "dnspython-2.2.1.tar.gz", hash = "sha256:0f7569a4a6ff151958b64304071d370daa3243d15941a7beedf0c9fe5105603e"}, +] +exceptiongroup = [ + {file = "exceptiongroup-1.0.0rc9-py3-none-any.whl", hash = "sha256:2e3c3fc1538a094aab74fad52d6c33fc94de3dfee3ee01f187c0e0c72aec5337"}, + {file = "exceptiongroup-1.0.0rc9.tar.gz", hash = "sha256:9086a4a21ef9b31c72181c77c040a074ba0889ee56a7b289ff0afb0d97655f96"}, +] +google-api-core = [ + {file = "google-api-core-2.10.2.tar.gz", hash = "sha256:10c06f7739fe57781f87523375e8e1a3a4674bf6392cd6131a3222182b971320"}, + {file = "google_api_core-2.10.2-py3-none-any.whl", hash = "sha256:34f24bd1d5f72a8c4519773d99ca6bf080a6c4e041b4e9f024fe230191dda62e"}, +] +google-api-python-client = [ + {file = "google-api-python-client-2.65.0.tar.gz", hash = "sha256:b8a0ca8454ad57bc65199044717d3d214197ae1e2d666426bbcd4021b36762e0"}, + {file = "google_api_python_client-2.65.0-py2.py3-none-any.whl", hash = "sha256:2c6611530308b3f931dcf1360713aa3a20cf465d0bf2bac65f2ec99e8c9860de"}, +] +google-auth = [ + {file = "google-auth-2.13.0.tar.gz", hash = "sha256:9352dd6394093169157e6971526bab9a2799244d68a94a4a609f0dd751ef6f5e"}, + {file = "google_auth-2.13.0-py2.py3-none-any.whl", hash = "sha256:99510e664155f1a3c0396a076b5deb6367c52ea04d280152c85ac7f51f50eb42"}, +] +google-auth-httplib2 = [ + {file = "google-auth-httplib2-0.1.0.tar.gz", hash = "sha256:a07c39fd632becacd3f07718dfd6021bf396978f03ad3ce4321d060015cc30ac"}, + {file = "google_auth_httplib2-0.1.0-py2.py3-none-any.whl", hash = "sha256:31e49c36c6b5643b57e82617cb3e021e3e1d2df9da63af67252c02fa9c1f4a10"}, +] +google-auth-oauthlib = [ + {file = "google-auth-oauthlib-0.7.0.tar.gz", hash = "sha256:db11bce4b3effc99b518ec22a2903470e0853c0c92be57694e3684e738d22513"}, + {file = "google_auth_oauthlib-0.7.0-py2.py3-none-any.whl", hash = "sha256:53019edbde83e08ff0740eefc5bded7e26a289941d12e7ae1f0f5bacf2faa031"}, +] +googleapis-common-protos = [ + {file = "googleapis-common-protos-1.56.4.tar.gz", hash = "sha256:c25873c47279387cfdcbdafa36149887901d36202cb645a0e4f29686bf6e4417"}, + {file = "googleapis_common_protos-1.56.4-py2.py3-none-any.whl", hash = "sha256:8eb2cbc91b69feaf23e32452a7ae60e791e09967d81d4fcc7fc388182d1bd394"}, +] +httplib2 = [ + {file = "httplib2-0.20.4-py3-none-any.whl", hash = "sha256:8b6a905cb1c79eefd03f8669fd993c36dc341f7c558f056cb5a33b5c2f458543"}, + {file = "httplib2-0.20.4.tar.gz", hash = "sha256:58a98e45b4b1a48273073f905d2961666ecf0fbac4250ea5b47aef259eb5c585"}, +] +idna = [ + {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, + {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, +] +iniconfig = [ + {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, + {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, +] +jmespath = [ + {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, + {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, +] +oauthlib = [ + {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, + {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, +] +packaging = [ + {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, + {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, +] +pluggy = [ + {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, + {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, +] +protobuf = [ + {file = "protobuf-4.21.8-cp310-abi3-win32.whl", hash = "sha256:c252c55ee15175aa1b21b7b9896e6add5162d066d5202e75c39f96136f08cce3"}, + {file = "protobuf-4.21.8-cp310-abi3-win_amd64.whl", hash = "sha256:809ca0b225d3df42655a12f311dd0f4148a943c51f1ad63c38343e457492b689"}, + {file = "protobuf-4.21.8-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:bbececaf3cfea9ea65ebb7974e6242d310d2a7772a6f015477e0d79993af4511"}, + {file = "protobuf-4.21.8-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:b02eabb9ebb1a089ed20626a90ad7a69cee6bcd62c227692466054b19c38dd1f"}, + {file = "protobuf-4.21.8-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:4761201b93e024bb70ee3a6a6425d61f3152ca851f403ba946fb0cde88872661"}, + {file = "protobuf-4.21.8-cp37-cp37m-win32.whl", hash = "sha256:f2d55ff22ec300c4d954d3b0d1eeb185681ec8ad4fbecff8a5aee6a1cdd345ba"}, + {file = "protobuf-4.21.8-cp37-cp37m-win_amd64.whl", hash = "sha256:c5f94911dd8feb3cd3786fc90f7565c9aba7ce45d0f254afd625b9628f578c3f"}, + {file = "protobuf-4.21.8-cp38-cp38-win32.whl", hash = "sha256:b37b76efe84d539f16cba55ee0036a11ad91300333abd213849cbbbb284b878e"}, + {file = "protobuf-4.21.8-cp38-cp38-win_amd64.whl", hash = "sha256:2c92a7bfcf4ae76a8ac72e545e99a7407e96ffe52934d690eb29a8809ee44d7b"}, + {file = "protobuf-4.21.8-cp39-cp39-win32.whl", hash = "sha256:89d641be4b5061823fa0e463c50a2607a97833e9f8cfb36c2f91ef5ccfcc3861"}, + {file = "protobuf-4.21.8-cp39-cp39-win_amd64.whl", hash = "sha256:bc471cf70a0f53892fdd62f8cd4215f0af8b3f132eeee002c34302dff9edd9b6"}, + {file = "protobuf-4.21.8-py2.py3-none-any.whl", hash = "sha256:a55545ce9eec4030cf100fcb93e861c622d927ef94070c1a3c01922902464278"}, + {file = "protobuf-4.21.8-py3-none-any.whl", hash = "sha256:0f236ce5016becd989bf39bd20761593e6d8298eccd2d878eda33012645dc369"}, + {file = "protobuf-4.21.8.tar.gz", hash = "sha256:427426593b55ff106c84e4a88cac855175330cb6eb7e889e85aaa7b5652b686d"}, +] +pyasn1 = [ + {file = "pyasn1-0.4.8-py2.4.egg", hash = "sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3"}, + {file = "pyasn1-0.4.8-py2.5.egg", hash = "sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf"}, + {file = "pyasn1-0.4.8-py2.6.egg", hash = "sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00"}, + {file = "pyasn1-0.4.8-py2.7.egg", hash = "sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8"}, + {file = "pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d"}, + {file = "pyasn1-0.4.8-py3.1.egg", hash = "sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86"}, + {file = "pyasn1-0.4.8-py3.2.egg", hash = "sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7"}, + {file = "pyasn1-0.4.8-py3.3.egg", hash = "sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576"}, + {file = "pyasn1-0.4.8-py3.4.egg", hash = "sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12"}, + {file = "pyasn1-0.4.8-py3.5.egg", hash = "sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2"}, + {file = "pyasn1-0.4.8-py3.6.egg", hash = "sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359"}, + {file = "pyasn1-0.4.8-py3.7.egg", hash = "sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776"}, + {file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"}, +] +pyasn1-modules = [ + {file = "pyasn1-modules-0.2.8.tar.gz", hash = "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e"}, + {file = "pyasn1_modules-0.2.8-py2.4.egg", hash = "sha256:0fe1b68d1e486a1ed5473f1302bd991c1611d319bba158e98b106ff86e1d7199"}, + {file = "pyasn1_modules-0.2.8-py2.5.egg", hash = "sha256:fe0644d9ab041506b62782e92b06b8c68cca799e1a9636ec398675459e031405"}, + {file = "pyasn1_modules-0.2.8-py2.6.egg", hash = "sha256:a99324196732f53093a84c4369c996713eb8c89d360a496b599fb1a9c47fc3eb"}, + {file = "pyasn1_modules-0.2.8-py2.7.egg", hash = "sha256:0845a5582f6a02bb3e1bde9ecfc4bfcae6ec3210dd270522fee602365430c3f8"}, + {file = "pyasn1_modules-0.2.8-py2.py3-none-any.whl", hash = "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74"}, + {file = "pyasn1_modules-0.2.8-py3.1.egg", hash = "sha256:f39edd8c4ecaa4556e989147ebf219227e2cd2e8a43c7e7fcb1f1c18c5fd6a3d"}, + {file = "pyasn1_modules-0.2.8-py3.2.egg", hash = "sha256:b80486a6c77252ea3a3e9b1e360bc9cf28eaac41263d173c032581ad2f20fe45"}, + {file = "pyasn1_modules-0.2.8-py3.3.egg", hash = "sha256:65cebbaffc913f4fe9e4808735c95ea22d7a7775646ab690518c056784bc21b4"}, + {file = "pyasn1_modules-0.2.8-py3.4.egg", hash = "sha256:15b7c67fabc7fc240d87fb9aabf999cf82311a6d6fb2c70d00d3d0604878c811"}, + {file = "pyasn1_modules-0.2.8-py3.5.egg", hash = "sha256:426edb7a5e8879f1ec54a1864f16b882c2837bfd06eee62f2c982315ee2473ed"}, + {file = "pyasn1_modules-0.2.8-py3.6.egg", hash = "sha256:cbac4bc38d117f2a49aeedec4407d23e8866ea4ac27ff2cf7fb3e5b570df19e0"}, + {file = "pyasn1_modules-0.2.8-py3.7.egg", hash = "sha256:c29a5e5cc7a3f05926aff34e097e84f8589cd790ce0ed41b67aed6857b26aafd"}, +] +pygsheets = [ + {file = "pygsheets-2.0.5-py2.py3-none-any.whl", hash = "sha256:85a4c871ac1d53013e042c13552b07f908b991c3d8c8770b3a68eb3452c8c218"}, + {file = "pygsheets-2.0.5.tar.gz", hash = "sha256:ea6ce75dabd1359e49fd36920ff0d25ff9428ccc3d5d2474bdba80fb8653ad80"}, +] +pymongo = [ + {file = "pymongo-4.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:68320e5326e2b1e49dcd901e6dcbe3009b8a0fd0da0c618579a2be7cf5f2d7be"}, + {file = "pymongo-4.3.2-cp310-cp310-manylinux1_i686.whl", hash = "sha256:3f41781c8310fe1ae3ed0b809e2d7be6ebba9f0954c08e1d18ac443916b82b29"}, + {file = "pymongo-4.3.2-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:372307185d8e17ea31d2f3ff6943e213a6c379ccf547f18b05a58a1620d6f92a"}, + {file = "pymongo-4.3.2-cp310-cp310-manylinux2014_i686.whl", hash = "sha256:1be15568e4b2be4c75bc54a542276c857628e09cbc283befcf4c45a0a22c1eec"}, + {file = "pymongo-4.3.2-cp310-cp310-manylinux2014_ppc64le.whl", hash = "sha256:cad31512e6956c95210fbd585d5b80df28425251260387164c6382894f0c6eca"}, + {file = "pymongo-4.3.2-cp310-cp310-manylinux2014_s390x.whl", hash = "sha256:b510843ea70e5bc9c096a93f683b28e8d43f1ad89da0126502d88b3d90f07ebe"}, + {file = "pymongo-4.3.2-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:253faefea46482ffa87c77fdd01cd95d430cc84aae8d7a78ba920ea6cebcf3c7"}, + {file = "pymongo-4.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a06c9ca15a2133478d1c775c4e7e5e782961b6254a3fc81ab5d0fb3cf9b8e358"}, + {file = "pymongo-4.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5231eb29e8174509250bc5fc609d6e8eceebfb209bf37bd6e014cbd7b6554344"}, + {file = "pymongo-4.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:178ffaa833d473b16fbd65c4a485af56484a50e2a201e8d0547f98cf5007f133"}, + {file = "pymongo-4.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f41c4e3b9e315655d6d1136d904ceda24fe5ea2d273ec6f9d66dbef06f3446"}, + {file = "pymongo-4.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:352bc034e112c9f6a408e2796e74bae900d3167a804224b2c24ea75b5d57e9f9"}, + {file = "pymongo-4.3.2-cp310-cp310-win32.whl", hash = "sha256:28ab644adc92c21a249570e2d677ebf4f2ef374630ddec98f19d2630dcb154c6"}, + {file = "pymongo-4.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:6049927b50c39e7dc51e75b5bb30c8501fbf1f08414b3447bcc9f9f967c116ed"}, + {file = "pymongo-4.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:77436db17ab2baec2356cf38db32d13c7cd11267c8137864c67391f2dfdcc5e4"}, + {file = "pymongo-4.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cefd851fdea191fc4db780157a28a11e0a80bccd34c454a73f252a287d28b2c7"}, + {file = "pymongo-4.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7c22ad464688a807bec103734cbdf712489c74d439cdd346e6f12095070bfbf5"}, + {file = "pymongo-4.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:db94b741dde2cc44ec038495d041c8f6dd4d510bb4e5d0be1b9f9aae4fbb28c6"}, + {file = "pymongo-4.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21e1cfa3e73cd253afcad32e2a46a277f52553635ccc0dd4d643f5824af88428"}, + {file = "pymongo-4.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c5db3bddbbc2657aa76088e76d24a616aefc98883c48dc27f3c3829ddb2ca10d"}, + {file = "pymongo-4.3.2-cp311-cp311-win32.whl", hash = "sha256:006799ddba1f2e73ce27689f016791ab80e51876c52ae2265d8c76016baaa10e"}, + {file = "pymongo-4.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:0c8f061eabef3a6b3696f7f7be3eaed7928864ff84a2248429f9c7eb564343cc"}, + {file = "pymongo-4.3.2-cp37-cp37m-macosx_10_6_intel.whl", hash = "sha256:80bdfc7039674c670e1afbf95849ce2075731785527eeac7e3850e862dec239b"}, + {file = "pymongo-4.3.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:8d81f6f5f6e66481aadd2fc087a937833312de23cd94b5ea1b225f35fafb0a00"}, + {file = "pymongo-4.3.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:02140c1a9f2107a16c074c9e558a556faafb0dc3c2e9332c6685c5506823ab9d"}, + {file = "pymongo-4.3.2-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:b4b683a40cf07b6d16704ead92a7aee24208d3af83d55d31248cdac003f8591c"}, + {file = "pymongo-4.3.2-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:dab1d89f969046057be2b904a7bbf40df114f43aebfb3ccdceb054d9c40ec56d"}, + {file = "pymongo-4.3.2-cp37-cp37m-manylinux2014_ppc64le.whl", hash = "sha256:0f48c2562a1d1426b6db7567511dc62817df43357041e1fd4ea5c68278bfa11b"}, + {file = "pymongo-4.3.2-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:ce14598b8fa93e51aed0f400e446fddd6b26297ba5965fd0c0585614b60b9fc0"}, + {file = "pymongo-4.3.2-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:af46f635513c7339419374f46f4f662cee7140bfb86de4377885a2c1de2278d4"}, + {file = "pymongo-4.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96483316a799923f13bb61170f05feab22e8bd8630bf8cdcd440c78f307039a"}, + {file = "pymongo-4.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:07f58d05d2289f93e16ddc93be6e0453fa67afd33c1b015f6bd3d9741c0963ff"}, + {file = "pymongo-4.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8817b17db2013354aa7f187d5825d65da0d7720b5ca697af37ff5efdf97e7f62"}, + {file = "pymongo-4.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36e7a74bdab9aa19f5ac94dfd74111d2164ccea752afbef0aa039d1266e7c404"}, + {file = "pymongo-4.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6db95d3e955aa5dbe42db691dd77cdddc0bc15f9883aa1def51f3ca40d49c1d6"}, + {file = "pymongo-4.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aee8fafea8bb669deb0dd4878d947f79b2ef298e60f06e1fe799598929b68be2"}, + {file = "pymongo-4.3.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cea32bd14d8c0725e22e5fcc607a81e3636650c689697c12423a34f9a125c7e2"}, + {file = "pymongo-4.3.2-cp37-cp37m-win32.whl", hash = "sha256:3966dcba4b80dbc0eb4dd08d6f7127e3b1701cd829b6c13507a956c878b78546"}, + {file = "pymongo-4.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6461d29a967e1980ba7798e4da8178dbe4245fe4a66ebb3aa07339c9da383c3a"}, + {file = "pymongo-4.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b36da8aeb95cc1abea7b80e578fb6bcdbe395638d16b1b0068bc121e2111a00f"}, + {file = "pymongo-4.3.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:315fe5f628e9aee67cc4c17b91ddf08c5c0917b764f433a5acf9aed33164a8f0"}, + {file = "pymongo-4.3.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:07e05784578bf7f8ecdfc6d0fd1e684e6259e9b5fdb5439a58c4f0df950fae29"}, + {file = "pymongo-4.3.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:f84b8428a41d7d7f2931762c27b09ffa8b3bc51e3b5dab40ab2b1d008091247e"}, + {file = "pymongo-4.3.2-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:39308580bcdbc368a2664c48761226c06b1d3368cc3ab3492d3cca88dc2e5e27"}, + {file = "pymongo-4.3.2-cp38-cp38-manylinux2014_ppc64le.whl", hash = "sha256:7f907daec92208d748db4ea04568aa33e9254e0c27e4e40ac287e1b1ca8b12b5"}, + {file = "pymongo-4.3.2-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:cb47ba9c19da8fb4174f9d7bbbdb1796ad288c61dda35c96fb45d69e61d3a5cb"}, + {file = "pymongo-4.3.2-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:0d7ad2112a705e992ca0cca98ccbb874276c495f8d9df627438c2ee94f810a3d"}, + {file = "pymongo-4.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4082d1b660e70d9df71da00050f7adb902b73a2287216e69ada124bd2f89636"}, + {file = "pymongo-4.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:26dd79e60f883b6467b91c8af0be484147365b18cebf9248f8e72c035aecb693"}, + {file = "pymongo-4.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:715ad027daff84e213ab74fa3ec98cad8dabb669653a71daa0dd6f80a1c32dd0"}, + {file = "pymongo-4.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ceded83530f5507dadd873f8d004b56f996de44d9c3f56b7f26c22ca823f12ee"}, + {file = "pymongo-4.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8f968621d019ed165f1cb5b037875ce3425ea7704407234895c7c52ad32190da"}, + {file = "pymongo-4.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4f7763c9e37e6d59406ce2defc25266980b24a86708ec6db753b02459db45715"}, + {file = "pymongo-4.3.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:f3861081540e1f06d1e5d131d1419b9fc507834b6865407e0f56735b4082566c"}, + {file = "pymongo-4.3.2-cp38-cp38-win32.whl", hash = "sha256:7424b7c59b16e7889a720a5b2e2dda518753c6fec6c6582ab2fcedf97df3df75"}, + {file = "pymongo-4.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:64b010681019c0b312f342e3aae1f3091a7dc7ff4b7a3dec72fc0e7238be9477"}, + {file = "pymongo-4.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cc7b269af274ac0d5d9a5c8d035b03ccc34438baa01705bf8ec7cc6a31093ace"}, + {file = "pymongo-4.3.2-cp39-cp39-manylinux1_i686.whl", hash = "sha256:68213f4c1531b95dcfef40f79dd95e94484f69ec5949b7f42f82ad2bee135f7f"}, + {file = "pymongo-4.3.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:11630f5b3287375c85f5b7a788d3a7241671af24fda2b49a3396bc53cbf1c0c6"}, + {file = "pymongo-4.3.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:35e9eec45a212306143367b0702c2aff75c375290015af00fa8b653641c20b34"}, + {file = "pymongo-4.3.2-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:94639935caf13af551429bd13e4cb20e7c110a57d07f0c6a84a9bf3c2c9000ad"}, + {file = "pymongo-4.3.2-cp39-cp39-manylinux2014_ppc64le.whl", hash = "sha256:7ce5d43c011e03cd1a42a4dcc0d5c8772f18533cdfe672a63607942d62581df4"}, + {file = "pymongo-4.3.2-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:98fd65c2aee7a55615dda1a1b0340ae8d756151983cb5040ea59a730083221e7"}, + {file = "pymongo-4.3.2-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:c8e82d6cc2f1cf5017485f55d67375bacf73d95c40903759e46024a987bab86f"}, + {file = "pymongo-4.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab7f49c5ca3db7ae94743b0da1b21c5e7402a561a0614c1b0fba718aad591611"}, + {file = "pymongo-4.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:404bc7f7190e8975f41f0c7498e303e9cb291f6384e1889ac4333448652a83d6"}, + {file = "pymongo-4.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2c821c897498e3e3c3254f7a90195f71473361f502201fd396281869d8108857"}, + {file = "pymongo-4.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6498ae9a76ad64617703373a43e3cd8454271bca0d7d395b393b4f31aa68f734"}, + {file = "pymongo-4.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b2a8b2b7d9196d46e5181f88632eeca5bf79a69ca2e9911229c58f66aebfbeb"}, + {file = "pymongo-4.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f423e066de040f4f93dcac0e6ceec37ffc25cc591a609ecc3ab20adfdbb787ae"}, + {file = "pymongo-4.3.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:04597a5d877a984b5e3059e942b02d68f8af9bb4328592abca27e82015560112"}, + {file = "pymongo-4.3.2-cp39-cp39-win32.whl", hash = "sha256:53dd2c034fb92c019e5e581cd361ed3fa9833abb56cc76725d56dcba169746fe"}, + {file = "pymongo-4.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:d7bdfac2f3c87d0971691f2a091427f55bb6b94b23d74213ed2de87d8facba85"}, + {file = "pymongo-4.3.2.tar.gz", hash = "sha256:95913659d6c5fc714e662533d014836c988cc1561684f07b6a0a8343651afa66"}, +] +pyparsing = [ + {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, + {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, +] +pytest = [ + {file = "pytest-7.2.0-py3-none-any.whl", hash = "sha256:892f933d339f068883b6fd5a459f03d85bfcb355e4981e146d2c7616c21fef71"}, + {file = "pytest-7.2.0.tar.gz", hash = "sha256:c4014eb40e10f11f355ad4e3c2fb2c6c6d1919c73f3b5a433de4708202cade59"}, +] +python-dateutil = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] +requests = [ + {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"}, + {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"}, +] +requests-oauthlib = [ + {file = "requests-oauthlib-1.3.1.tar.gz", hash = "sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"}, + {file = "requests_oauthlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5"}, +] +rsa = [ + {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, + {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"}, +] +s3transfer = [ + {file = "s3transfer-0.6.0-py3-none-any.whl", hash = "sha256:06176b74f3a15f61f1b4f25a1fc29a4429040b7647133a463da8fa5bd28d5ecd"}, + {file = "s3transfer-0.6.0.tar.gz", hash = "sha256:2ed07d3866f523cc561bf4a00fc5535827981b117dd7876f036b0c1aca42c947"}, +] +six = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] +tomli = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] +uritemplate = [ + {file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"}, + {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"}, +] +urllib3 = [ + {file = "urllib3-1.26.12-py2.py3-none-any.whl", hash = "sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997"}, + {file = "urllib3-1.26.12.tar.gz", hash = "sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e"}, +] diff --git a/scripts/ingestion/pyproject.toml b/scripts/ingestion/pyproject.toml new file mode 100644 index 0000000..ead9496 --- /dev/null +++ b/scripts/ingestion/pyproject.toml @@ -0,0 +1,18 @@ +[tool.poetry] +name = "gh_ebola_ingestion" +version = "0.1.0" +description = "Transfers line list data from Google Sheets to storage" +authors = ["Global.health team "] + +[tool.poetry.dependencies] +python = "^3.10" +boto3 = "^1.25.1" +pymongo = "^4.3.2" +pygsheets = "^2.0.5" + +[tool.poetry.dev-dependencies] +pytest = "^7.2.0" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/scripts/ingestion/run.py b/scripts/ingestion/run.py new file mode 100644 index 0000000..3507e36 --- /dev/null +++ b/scripts/ingestion/run.py @@ -0,0 +1,106 @@ +from datetime import datetime +import json +import logging +import os +import sys +from urllib.parse import urlparse + +import boto3 +from pymongo import MongoClient +from pymongo.errors import PyMongoError +import pygsheets + + +DOCUMENT_ID = os.environ.get("DOCUMENT_ID") + +S3 = boto3.resource("s3") +LOCALSTACK_URL = os.environ.get("LOCALSTACK_URL") + +if LOCALSTACK_URL: + S3 = boto3.resource("s3", endpoint_url=LOCALSTACK_URL) + +S3_BUCKET = os.environ.get("S3_BUCKET") +S3_FOLDER = os.environ.get("S3_FOLDER") + +DB_CONNECTION = os.environ.get("DB_CONNECTION") +DATABASE_NAME = os.environ.get("DATABASE_NAME") +GH_COLLECTION = os.environ.get("GH_COLLECTION") + +MINIMUM_DATA = ["ID", "Date_confirmation", "Curator_initials", "Country", "Status"] + +PRIVATE_FIELDS = ["Curator_initials", "Notes", "Pathogen_status"] + +TODAY = datetime.today() + + +def setup_logger(): + h = logging.StreamHandler(sys.stdout) + rootLogger = logging.getLogger() + rootLogger.addHandler(h) + rootLogger.setLevel(logging.DEBUG) + + +def get_data(): + logging.info("Getting data from Google Sheets") + client = pygsheets.authorize(service_account_env_var="GOOGLE_CREDENTIALS") + spreadsheet = client.open_by_key(DOCUMENT_ID) + + return spreadsheet[0].get_all_records() + + +def clean_data(data): + logging.info("Cleaning data") + for c in data: + for field in PRIVATE_FIELDS: + if field in c: + c.pop(field) + return data + + +def format_data(data): + logging.info("Formatting data") + csv_data = "" + column_names = data[0].keys() + for name in column_names: + csv_data += f"{name}," + csv_data += "\n" + for row in data: + for val in row.values(): + csv_data += f"{str(val).replace(',', ';')}," + csv_data += "\n" + return csv_data + + +def store_data(csv_data): + logging.info("Uploading data to S3") + try: + S3.Object(S3_BUCKET, f"{S3_FOLDER}/{TODAY}.csv").put(Body=csv_data) + S3.Object(S3_BUCKET, f"latest.csv").put(Body=csv_data) + except Exception as exc: + logging.exception(f"An exception occurred while trying to upload files") + raise + + +def data_to_db(data): + logging.info("Adding data to the database") + try: + client = MongoClient(DB_CONNECTION) + database = client[DATABASE_NAME] + for entry in data: + find = {"ID": entry["ID"]} + update = {"$set": entry} + database[GH_COLLECTION].update_one(find, update, upsert=True) + except PyMongoError: + logging.exception("An error occurred while trying to insert data") + raise + + +if __name__ == "__main__": + setup_logger() + logging.info("Starting Ebola data ingestion") + data = get_data() + data = clean_data(data) + csv_data = format_data(data) + store_data(csv_data) + data_to_db(data) + logging.info("Work complete") diff --git a/scripts/ingestion/run.sh b/scripts/ingestion/run.sh new file mode 100755 index 0000000..9e0e23c --- /dev/null +++ b/scripts/ingestion/run.sh @@ -0,0 +1,9 @@ +#!/bin/bash +set -eo pipefail + +if [[ -v LOCALSTACK_URL ]]; then + echo "Localstack configured, running setup script" + poetry run python3 setup.py +fi + +poetry run python3 run.py diff --git a/scripts/ingestion/run_stack.sh b/scripts/ingestion/run_stack.sh new file mode 100755 index 0000000..e198ec6 --- /dev/null +++ b/scripts/ingestion/run_stack.sh @@ -0,0 +1,2 @@ +#!/bin/bash +docker compose -f docker-compose.yml up --build --force-recreate --renew-anon-volumes diff --git a/scripts/ingestion/setup.py b/scripts/ingestion/setup.py new file mode 100644 index 0000000..977db90 --- /dev/null +++ b/scripts/ingestion/setup.py @@ -0,0 +1,76 @@ +import logging +import os +from time import sleep + +import boto3 +import requests +from pymongo import MongoClient +from pymongo.errors import PyMongoError + + +LOCALSTACK_URL = os.environ.get("AWS_ENDPOINT", "http://localstack:4566") +S3_BUCKET = os.environ.get("S3_BUCKET", "test") + +DB_CONNECTION = os.environ.get("DB_CONNECTION", "test") +DATABASE_NAME = os.environ.get("DB_NAME", "monkeypox") + +GH_COLLECTION = os.environ.get("GH_COLLECTION", "gh") + +MAX_ATTEMPTS = 42 +WAIT_TIME = 5 + + +def wait_for_localstack(): + logging.info("Waiting for localstack") + healthcheck_url = "".join([LOCALSTACK_URL, "/health"]) + counter = 0 + while counter < MAX_ATTEMPTS: + try: + response = requests.get(healthcheck_url) + s3_status = response.json().get("services", {}).get("s3") + if s3_status in ["available", "ready"]: + return + except requests.exceptions.ConnectionError: + pass + counter += 1 + sleep(WAIT_TIME) + raise Exception("Localstack not available") + + +def wait_for_database(): + logging.info("Waiting for database") + counter = 0 + while counter < MAX_ATTEMPTS: + try: + client = MongoClient(DB_CONNECTION) + logging.info(f"Connected with access to: {client.list_database_names()}") + return + except PyMongoError: + logging.info(f"Database service not ready yet, retrying in {WAIT_TIME} seconds") + pass + counter += 1 + sleep(WAIT_TIME) + raise Exception("Database service not available") + + +def create_bucket(bucket_name:str) -> None: + logging.info(f"Creating S3 bucket {bucket_name}") + s3_client = boto3.client("s3", endpoint_url=LOCALSTACK_URL) + s3_client.create_bucket(Bucket=bucket_name) + + +def create_database(): + logging.info(f"Creating {DATABASE_NAME} database, or confirming it exists") + client = MongoClient(DB_CONNECTION) + database = client[DATABASE_NAME] + logging.info("Creating collections") + _ = database[GH_COLLECTION] + + +if __name__ == "__main__": + logging.info("Starting local/testing setup script") + wait_for_localstack() + wait_for_database() + create_database() + create_bucket(S3_BUCKET) + logging.info("Done") \ No newline at end of file diff --git a/scripts/ingestion/test.sh b/scripts/ingestion/test.sh new file mode 100755 index 0000000..33a1758 --- /dev/null +++ b/scripts/ingestion/test.sh @@ -0,0 +1,5 @@ +#!/bin/bash +set -eo pipefail + +poetry run python3 setup.py +poetry run python3 -m pytest -rs -vv . diff --git a/scripts/ingestion/test_run.py b/scripts/ingestion/test_run.py new file mode 100644 index 0000000..ace1da8 --- /dev/null +++ b/scripts/ingestion/test_run.py @@ -0,0 +1,64 @@ +import os + +import boto3 +from pymongo import MongoClient +import pytest + +from run import (DB_CONNECTION, DATABASE_NAME, GH_COLLECTION, S3_BUCKET, LOCALSTACK_URL, + clean_data, format_data, data_to_db, store_data) + +CASE = { + "ID": 1, + "Date_confirmation": "2021-05-12", + "Curator_initials": "ZZ", + "Notes": "example note", + "Country": "Uganda", + "Status": "confirmed" +} + +CLEAN_CASE = CASE.copy() +del CLEAN_CASE["Curator_initials"] +del CLEAN_CASE["Notes"] +CLEAN_DATA = [CLEAN_CASE] + +CSV_DATA = """ +ID,Date_confirmation,Country,Status, +1,2021-05-12,Uganda,confirmed, +""" + + +def get_contents(file_name: str) -> str: + s3 = boto3.resource("s3", endpoint_url=LOCALSTACK_URL) + obj = s3.Object(S3_BUCKET, file_name) + return obj.get()["Body"].read().decode("utf-8") + + +def get_db_records(collection: str) -> list[dict]: + db = MongoClient(DB_CONNECTION)[DATABASE_NAME][collection] + cursor = db.find({}) + return [record for record in cursor] + + +def test_clean_data(): + assert clean_data([CASE]) == CLEAN_DATA + + +def test_format_data(): + assert format_data(CLEAN_DATA).strip("\n") == CSV_DATA.strip("\n") + + +@pytest.mark.skipif(not os.environ.get("DOCKERIZED", False), + reason="Running e2e tests outside of mock environment disabled") +def test_store_data(): + store_data(CSV_DATA) + assert get_contents("latest.csv") == CSV_DATA + + +@pytest.mark.skipif(not os.environ.get("DOCKERIZED", False), + reason="Running e2e tests outside of mock environment disabled") +def test_data_to_db(): + data_to_db(CLEAN_DATA) + db_records = get_db_records(GH_COLLECTION) + del db_records[0]["_id"] + assert db_records == CLEAN_DATA + diff --git a/scripts/ingestion/test_stack.sh b/scripts/ingestion/test_stack.sh new file mode 100755 index 0000000..7c06704 --- /dev/null +++ b/scripts/ingestion/test_stack.sh @@ -0,0 +1,2 @@ +#!/bin/bash +docker compose -f docker-compose-test.yml up --build --force-recreate --remove-orphans --renew-anon-volumes --exit-code-from test