diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..944f1a0 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,8 @@ +.git* +*.ows +build +.hadolint +.travis.yml +dist +examples +node_modules diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..7419d9d --- /dev/null +++ b/.flake8 @@ -0,0 +1,39 @@ + +[flake8] +# Explicitly ignore rules to maintain better compatibility with black. +# +# F403: import * +# F811: redefinition of unused `name` from line `N` +# F821: undefined name +# F841: local variable assigned but never used +# E402: module level import not at top of file +# I100: your import statements are in the wrong order. +# I101: the names in your from import are in the wrong order. +# D400: first line should end with a period. +# E203: colons should not have any space before them. +# E231: missing whitespace after ',' +# E501: line lengths are recommended to be no greater than 79 characters. +# E503: there is no need for backslashes between brackets. +# E731: do not assign a lambda expression, use a def +# W293: line break before binary operator +# W293: blank line contains whitespace +select = E,F,W,C +ignore = F403, F811, F821, F841, E231, E402, I100, I101, D400, E501, E501, E503, E731, W293, W503 +extend-ignore = E203 +max-complexity = 15 +# https://docs.djangoproject.com/en/dev/internals/contributing/writing-code/coding-style/ +max-line-length = 119 +builtins = c, get_config +exclude = + .ansible, + .cache, + __pycache__, + .github, + .ipynb_checkpoints, + .pytest_cache, + .travis, + .vscode, + ansible, + docs, + node_modules, + venv diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml new file mode 100644 index 0000000..ffb4144 --- /dev/null +++ b/.github/workflows/docker.yaml @@ -0,0 +1,46 @@ +name: Test and Push + +on: + pull_request: + paths-ignore: + - "*.md" + push: + branches: + - main + paths-ignore: + - "*.md" + +jobs: + build-images: + name: Build Docker Images + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - name: Clone Main Repo + uses: actions/checkout@v2 + with: + path: main + - name: Set Up Python + uses: actions/setup-python@v2 + with: + python-version: 3.x + - name: Install Dev Dependencies + run: | + python -m pip install --upgrade pip + make -C main dev-env + - name: Build Docker Images + run: make -C main build-all + env: + # Full logs for CI build + BUILDKIT_PROGRESS: plain + - name: Login to Docker Hub + if: github.ref == 'refs/heads/main' + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Push Images to DockerHub + if: github.ref == 'refs/heads/main' + run: make -C main push-all + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..008e0e9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,141 @@ +# ide's +.vscode + +# mac +.DS_Store + +# npm +node_modules/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# random test ouputs +*.ows + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/.hadolint.yml b/.hadolint.yml new file mode 100644 index 0000000..64ab6ec --- /dev/null +++ b/.hadolint.yml @@ -0,0 +1,11 @@ +--- +ignored: + - DL3000 + - DL3004 + - DL3006 + - DL3007 + - DL3008 + - DL3015 + - DL3016 + - DL4006 + - SC2035 \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f86cbca --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 IllumiDesk + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3a0b59c --- /dev/null +++ b/Makefile @@ -0,0 +1,106 @@ +# Most of the code below is sourced from: +# https://raw.githubusercontent.com/jupyter/docker-stacks/master/Makefile +.PHONY: build + +# Use bash for inline if-statements in target +SHELL:=bash +TAG:=julia-1.6.1 +OWNER:=illumidesk +VENV_NAME?=venv +VENV_BIN=$(shell pwd)/${VENV_NAME}/bin +VENV_ACTIVATE=. ${VENV_BIN}/activate + +PYTHON=${VENV_BIN}/python3 + +# Need to list the images in build dependency order +ALL_STACKS:=umich-notebook + +ALL_IMAGES:=$(ALL_STACKS) + +# Linter +HADOLINT="${HOME}/hadolint" + +help: +# http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html +# http://github.com/jupyter/docker-stacks + @echo "illumidesk/illumidesk-stacks" + @echo "=====================" + @echo "Replace % with a stack directory name (e.g., make build/python-notebook)" + @echo + @grep -E '^[a-zA-Z0-9_%/-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + +build/%: DARGS?= +build/%: OWNER?= +build/%: TAG?= +build/%: ## build the latest image for a stack + @docker build $(DARGS) --rm --force-rm -t $(OWNER)/$(notdir $@):$(TAG) ./$(notdir $@) + @echo -n "Built image size: " + @docker images $(OWNER)/$(notdir $@):$(TAG) --format "{{.Size}}" + +build-all: $(foreach I,$(ALL_IMAGES), build/$(I)) ## build all stacks + +push/%: DARGS?= +push/%: TAG?= +push/%: ## push stack images + @docker push $(DARGS) $(OWNER)/$(notdir $@):$(TAG) + +push-all: $(foreach I,$(ALL_IMAGES), push/$(I)) ## push all stacks + +cont-clean-all: cont-stop-all cont-rm-all ## clean all containers (stop + rm) +cont-stop-all: ## stop all containers + @echo "Stopping all containers ..." + -docker stop -t0 $(shell docker ps -a -q) 2> /dev/null +cont-rm-all: ## remove all containers + @echo "Removing all containers ..." + -docker rm --force $(shell docker ps -a -q) 2> /dev/null + +dev/%: ARGS?= +dev/%: DARGS?= +dev/%: PORT?=8888 +dev/%: ## run one of the containers (stacks) on port 8888 + @docker run -it --rm -p $(PORT):8888 $(DARGS) $(OWNER)/$(notdir $@) $(ARGS) + +dev-env: ## install libraries required to build docs and run tests during CI. + @pip install -r dev-requirements.txt + +img-clean: img-rm-dang img-rm ## clean dangling and jupyter images +img-list: ## list jupyter images + @echo "Listing $(OWNER) images ..." + docker images "$(OWNER)/*" +img-rm: ## remove jupyter images + @echo "Removing $(OWNER) images ..." + -docker rmi --force $(shell docker images --quiet "$(OWNER)/*") 2> /dev/null +img-rm-dang: ## remove dangling images (tagged None) + @echo "Removing dangling images ..." + -docker rmi --force $(shell docker images -f "dangling=true" -q) 2> /dev/null + +lint/%: ARGS?=--config .hadolint.yml +lint/%: ## lint the dockerfile(s) for a stack + @echo "Linting Dockerfiles with Hadolint in $(notdir $@)..." + @git ls-files --exclude='Dockerfile*' --ignored $(notdir $@) | grep -v ppc64 | xargs -L 1 $(HADOLINT) $(ARGS) + @echo "Linting with Hadolint done!" + @echo "Linting tests with flake8 in in $(notdir $@)..." + ${VENV_BIN}/flake8 $(notdir $@) + @echo "Linting with flake8 done!" + @echo "Applying black updates to test files in $(notdir $@)..." + ${VENV_BIN}/black $(notdir $@) + @echo "Source formatting with black done!" + +lint-all: $(foreach I,$(ALL_IMAGES),lint/$(I) ) ## lint all stacks + +lint-build-all: $(foreach I,$(ALL_IMAGES),lint/$(I) build/$(I) ) ## lint, build and test all stacks + +lint-install: ## install hadolint + @echo "Installing hadolint at $(HADOLINT) ..." + @curl -sL -o $(HADOLINT) "https://github.com/hadolint/hadolint/releases/download/v1.18.0/hadolint-$(shell uname -s)-$(shell uname -m)" + @chmod 700 $(HADOLINT) + @echo "Hadolint nstallation done!" + @$(HADOLINT) --version + +test: lint-build-all ## test images as running containers + @echo "Testing images as running containers ..." + @echo "Testing done!" + +venv: lint-install ## install linter and create virtual environment + test -d $(VENV_NAME) || virtualenv -p python3 $(VENV_NAME) + ${PYTHON} -m pip install -r dev-requirements.txt diff --git a/README.md b/README.md new file mode 100644 index 0000000..558cde7 --- /dev/null +++ b/README.md @@ -0,0 +1,85 @@ +[![Test Docker Image Status](https://github.com/illumidesk/illumidesk-stacks/workflows/Test%20and%20Push/badge.svg)](https://github.com/illumidesk/illumidesk-stacks/actions?query=branch%3Amain+workflow%3A%22Test+and+Push%22) + + +# IllumiDesk Docker Stacks + +This repo is used to manage IllumiDesk's standard docker images for the IllumiDesk learning environment to power Jupyter Server backends. + +## Pre Requisits + +- [Docker](https://docs.docker.com/get-docker/) + +## Quickstart + +1. Install dependencies + +```bash +make venv +``` + +2. Build images + +```bash +make build-all +``` + +3. Run: + +Running the image standalone is helpful for testing: + +```bash +docker run -p 8888:8888 illumidesk/python-notebook:latest +``` + +Then, navigate to `http://127.0.0.1:8888` to access your Jupyter Notebook server. + +> Refer to [docker's documentation](https://docs.docker.com/engine/reference/run/) for additional `docker run ...` options. + +## Customize the Image + +1. Add additional Python packages by editing the `./python-notebook/requirements.txt` file. + +2. Rebuild end-user and grader images with `make build-all`. + +3. Push images to AWS ECR + +- [You must first authenticate](https://docs.aws.amazon.com/AmazonECR/latest/userguide/registry_auth.html) to push to AWS ECR repos using `docker` coomands. +- Run `docker push ` where `` represents the full path to your AWS ECR repository. + +1. (Optional) Push images to DockerHub + +This step requires creating an Organization account in DockerHub or other docker image compatible registry. The `docker push ...` command will push the image to the DockerHub registry by default. Please refer to the official Docker documentation if you would like to push another registry. + +For example, assuming the DockerHub organization is `illumidesk`, the source files are in the `python-notebook` folder, and the tag is `latest`, then the full namespace for the image would be `illumidesk/python-notebook:latest`. Assuming the image has been built, push the image to DockerHub or any other docker registry with the `docker push :` command: + +```bash +docker login +docker push illumidesk/python-notebook:latest +``` + +## Development and Testing + +1. Create your virtual environment and install dev-requirements: + +```bash +make venv +``` + +2. Check Dockerfiles with linter: + +```base +make lint-all +``` + +## References + +These images are based on the `jupyter/docker-stacks` images. [Refer to their documentation](https://jupyter-docker-stacks.readthedocs.io/en/latest/) for the full set of configuration and testing options. + +## Attributions + +- [JupyterHub repo2docker](https://repo2docker.readthedocs.io/en/latest/) +- [jupyter/docker-stacks images](https://github.com/jupyter/docker-stacks) + +## License + +MIT diff --git a/dev-requirements.txt b/dev-requirements.txt new file mode 100644 index 0000000..92810dd --- /dev/null +++ b/dev-requirements.txt @@ -0,0 +1,7 @@ +black +docker +flake8 +pytest +requests +tabulate +transifex-client diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..d769157 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,24 @@ +[tool.black] +skip-string-normalization = true +line-length = 119 +target-version = ['py38'] +include = '\.pyi?$' +exclude = ''' + +( + /( + \.eggs + | \.git + | \.ipynb_checkpoints + | \.mypy_cache + | \.pytest_cache + | \.travis + | \.vscode + | \.venv + | _build + | ansible + | build + | dist + ) +) +''' \ No newline at end of file diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..106434f --- /dev/null +++ b/pytest.ini @@ -0,0 +1,8 @@ +[pytest] +addopts = -ra +log_cli = 1 +log_cli_level = INFO +log_cli_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s) +log_cli_date_format=%Y-%m-%d %H:%M:%S +markers = + info: marks tests as info (deselect with '-m "not info"') \ No newline at end of file diff --git a/python-notebook/Dockerfile b/python-notebook/Dockerfile new file mode 100644 index 0000000..a260481 --- /dev/null +++ b/python-notebook/Dockerfile @@ -0,0 +1,22 @@ +# Based mostly off of: +# https://github.com/jupyter/docker-stacks/blob/main/images/julia-notebook +ARG REGISTRY=quay.io +ARG OWNER=jupyter +ARG BASE_CONTAINER=$REGISTRY/$OWNER/datascience-notebook +FROM ${BASE_CONTAINER} + +# Fix: https://github.com/hadolint/hadolint/wiki/DL4006 +# Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +USER ${NB_UID} + +# Setup IJulia kernel & other packages +COPY --chown=${NB_UID}:${NB_GID} requirements.txt "${HOME}/requirements.txt" +WORKDIR "${HOME}" +RUN pip install -r requirements.txt + + +WORKDIR "${HOME}" + +CMD ["jupyter", "kernelgateway", "--KernelGatewayApp.ip=0.0.0.0", "--KernelGatewayApp.port=8888"] diff --git a/python-notebook/requirements.txt b/python-notebook/requirements.txt new file mode 100755 index 0000000..a051485 --- /dev/null +++ b/python-notebook/requirements.txt @@ -0,0 +1,18 @@ +psycopg2-binary +gpt4all +jupyter_kernel_gateway +langchain +langchain.tools +langchain_openai +langchain_text_splitters +llmlingua +networkx +nltk +openai +pdfplumber +pymupdf +pypdf +pdf2image +poppler-utils +reportlab +requests \ No newline at end of file