Skip to content

Commit

Permalink
Merge pull request #232 from umccr/docs/docker-minio-example
Browse files Browse the repository at this point in the history
docs: docker minio example
  • Loading branch information
mmalenic authored Mar 21, 2024
2 parents 1bf0087 + 233f1de commit 1446dbb
Show file tree
Hide file tree
Showing 16 changed files with 259 additions and 91 deletions.
4 changes: 1 addition & 3 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,10 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Docker GitHub release
id: docker_build
uses: docker/build-push-action@v5
with:
context: .
file: deploy/Dockerfile
push: true
tags: |
ghcr.io/umccr/htsget-rs:dev-latest
ghcr.io/umccr/htsget-rs:dev-${{ github.run_number }}
ghcr.io/umccr/htsget-rs:latest
12 changes: 3 additions & 9 deletions deploy/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,25 +1,19 @@
FROM rust:1.74.0 AS builder
FROM rust:1.76-slim AS builder

WORKDIR /build

RUN cargo install cargo-strip

COPY . .

RUN cargo build --features url-storage --release && \
RUN cargo build --all-features --release && \
cargo strip

FROM debian:stable-slim
FROM gcr.io/distroless/cc-debian12

RUN apt update && apt install -y libc6-dev && rm -rf /var/lib/apt/lists/*

COPY --from=builder /etc/ssl/certs/ /etc/ssl/certs/
COPY --from=builder /build/target/release/htsget-actix /usr/local/bin/htsget-actix

ENV HTSGET_TICKET_SERVER_ADDR 0.0.0.0:8080
ENV HTSGET_DATA_SERVER_ADDR 0.0.0.0:8081

EXPOSE 8080
EXPOSE 8081

CMD [ "htsget-actix" ]
15 changes: 15 additions & 0 deletions deploy/Dockerfile.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
*

!/htsget-actix
!/htsget-config
!/htsget-http
!/htsget-lambda
!/htsget-search
!/htsget-test
!/Cargo.toml
!/Cargo.lock

**/benches
**/examples
**/LICENSE
**/*.md
31 changes: 5 additions & 26 deletions deploy/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,33 +137,12 @@ Examples of different Lambda events are located in the [`data/events`][data-even

## Docker

There are multiple options to use docker containers with htsget-rs:

### Local

```
$ docker build . -f deploy/Dockerfile -t htsget-rs-actix
$ docker run -p 8080:8080 -p 8081:8081 htsget-rs-actix
2023-10-25T01:01:38.412471Z INFO bind_addr{addr=0.0.0.0:8081 cors=CorsConfig { allow_credentials: false, allow_origins: List([HeaderValue("http://localhost:8080")]), allow_headers: Tagged(All), allow_methods: Tagged(All), max_age: 86400, expose_headers: List([]) }}: htsget_search::storage::data_server: data server address bound to address=0.0.0.0:8081
2023-10-25T01:01:38.412710Z INFO run_server: htsget_actix: using non-TLS ticket server
2023-10-25T01:01:38.412805Z INFO run_server: htsget_actix: htsget query server addresses bound addresses=[0.0.0.0:8080]
2023-10-25T01:01:38.412837Z INFO run_server: actix_server::builder: starting 8 workers
2023-10-25T01:01:38.412892Z INFO actix_server::server: Actix runtime found; starting in Actix runtime
```

### Local with LocalStack (local AWS)

```
$ cd deploy
$ docker compose up --wait -d
$ npx cdklocal bootstrap
$ npx cdklocal deploy
```

### Local with MinIO (S3) backend

TBD, fetch instructions from [NBIS Sweden usecase, test and document them here properly](https://github.com/NBISweden/htsget-rs/tree/docker-testing/deploy).
There are example deployments using Docker under the [examples] directory. These include a [`LocalStorage`][local] deployment
and a [MinIO][minio] deployment.

[local]: examples/local_storage/README.md
[examples]: examples
[minio]: examples/minio/README.md
[htsget-lambda-bin]: bin/htsget-lambda.ts
[htsget-lambda-stack]: lib/htsget-lambda-stack.ts
[htsget-settings]: bin/settings.ts
Expand Down
7 changes: 7 additions & 0 deletions deploy/config/dev_umccr.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,27 +26,34 @@ environment = "dev"
[[resolvers]]
regex = '^(umccr-10c-data-dev)/(?P<key>.*)$'
substitution_string = '$key'
storage = 'S3'

[[resolvers]]
regex = '^(umccr-10f-data-dev)/(?P<key>.*)$'
substitution_string = '$key'
storage = 'S3'

[[resolvers]]
regex = '^(umccr-10g-data-dev)/(?P<key>.*)$'
substitution_string = '$key'
storage = 'S3'

[[resolvers]]
regex = '^(umccr-agha-test-dev)/(?P<key>.*)$'
substitution_string = '$key'
storage = 'S3'

[[resolvers]]
regex = '^(umccr-research-dev)/(?P<key>.*)$'
substitution_string = '$key'
storage = 'S3'

[[resolvers]]
regex = '^(umccr-primary-data-dev)/(?P<key>.*)$'
substitution_string = '$key'
storage = 'S3'

[[resolvers]]
regex = '^(umccr-validation-prod)/(?P<key>.*)$'
substitution_string = '$key'
storage = 'S3'
3 changes: 3 additions & 0 deletions deploy/config/prod_umccr.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,14 @@ environment = "prod"
[[resolvers]]
regex = '^(umccr-research-dev)/(?P<key>.*)$'
substitution_string = '$key'
storage = 'S3'

[[resolvers]]
regex = '^(umccr-validation-prod)/(?P<key>.*)$'
substitution_string = '$key'
storage = 'S3'

[[resolvers]]
regex = '^(umccr-primary-data-prod)/(?P<key>.*)$'
substitution_string = '$key'
storage = 'S3'
26 changes: 13 additions & 13 deletions deploy/config/public_umccr.toml
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
data_server_enabled = false

name = "umccr-htsget-rs"
version = "0.1"
organization_name = "UMCCR"
organization_url = "https://umccr.org/"
contact_url = "https://umccr.org/"
documentation_url = "https://github.com/umccr/htsget-rs"
environment = "public"
name = 'umccr-htsget-rs'
version = '0.1'
organization_name = 'UMCCR'
organization_url = 'https://umccr.org/'
contact_url = 'https://umccr.org/'
documentation_url = 'https://github.com/umccr/htsget-rs'
environment = 'public'

[[resolvers]]
regex = "^(org.umccr.demo.sbeacon-data)/CINECA_UK1/(?P<key>.*)$"
substitution_string = "CINECA_UK1/$key"
storage = "S3"
regex = '^(org.umccr.demo.sbeacon-data)/CINECA_UK1/(?P<key>.*)$'
substitution_string = 'CINECA_UK1/$key'
storage = 'S3'

[[resolvers]]
regex = "^(org.umccr.demo.htsget-rs-data)/(?P<type>bam|cram|vcf|bcf|crypt4gh|mixed)/(?P<key>.*)$"
substitution_string = "$type/$key"
storage = "S3"
regex = '^(org.umccr.demo.htsget-rs-data)/(?P<type>bam|cram|vcf|bcf|crypt4gh|mixed)/(?P<key>.*)$'
substitution_string = '$type/$key'
storage = 'S3'
8 changes: 0 additions & 8 deletions deploy/docker-compose.yml

This file was deleted.

44 changes: 44 additions & 0 deletions deploy/examples/local_storage/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# LocalStorage deployment

A simple [`LocalStorage`][local] deployment using default settings is available under the [`compose.yml`][compose] file in this directory.

To run, use:

```
docker compose up
```

This launches a `LocalStorage` htsget-actix server serving data from the [`data`][data] directory.

The htsget-rs server can then be queried:

```sh
curl http://127.0.0.1:8080/reads/data/bam/htsnexus_test_NA12878
```

Which outputs:
```sh
{
"htsget": {
"format": "BAM",
"urls": [
{
"url": "http://0.0.0.0:8081/data/bam/htsnexus_test_NA12878.bam",
"headers": {
"Range": "bytes=0-2596770"
}
},
{
"url": "data:;base64,H4sIBAAAAAAA/wYAQkMCABsAAwAAAAAAAAAAAA=="
}
]
}
}
```

The volumes of the [`compose.yml`][compose] can be changed to any directory to serve data from it using
default settings, and `curl http://127.0.0.1:8080/reads/data/<id>`, noting the extra `data` prefix.

[local]: ../../../htsget-config/README.md#resolvers
[compose]: compose.yml
[data]: ../../../data
11 changes: 11 additions & 0 deletions deploy/examples/local_storage/compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
version: "3"

services:
htsget-rs:
image: ghcr.io/umccr/htsget-rs:latest
ports:
- "8080:8080"
- "8081:8081"
volumes:
# Change this to any data location to serve files using default settings.
- ./../../../data:/data
71 changes: 71 additions & 0 deletions deploy/examples/minio/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# MinIO deployment

[MinIO][minio] can be used with htsget-rs by configuring the [storage type][storage] as `S3` and setting the `endpoint` to the MinIO server.
There are a few specific configuration options that need to be considered to use MinIO with htsget-rs, and those include:

* The standard [AWS environment variables][env-variables] for connecting to AWS services must be set, and configured to match those
used by MinIO.
* This means that htsget-rs expects an `AWS_DEFAULT_REGION` to be set, which must match the region used by MinIO (by default us-east-1).
* It also means that the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` must be set to match the credentials used by MinIO.
* If using virtual-hosted style [addressing][virtual-addressing] instead of path style [addressing][path-addressing], `MINIO_DOMAIN` must be
set on the MinIO server and DNS resolution must allow accessing the MinIO server using `bucket.<MINIO_DOMAIN>`.
* Path style addressing can be used instead by setting `path_style = true` under the htsget-rs resolvers storage type.

The caveats around the addressing style occur because there are two different addressing styles for S3 buckets, path style, e.g.
`http://minio:9000/bucket`, and virtual-hosted style, e.g. `http://bucket.minio:9000`. AWS has declared path style addressing
as [deprecated][path-style-deprecated], so this example sets up virtual-hosted style addressing as the default.

## Deployment using Docker

The above configuration can be applied using docker-compose to set the relevant environment variables. Additionally, if using
docker compose and virtual-hosted style addressing, a network alias which allows accessing the MinIO service under `bucket.<MINIO_DOMAIN>`
must be present.

An example [`compose.yml`][compose] is available which shows htsget-rs configured to use MinIO, serving data from the [data] directory.

After running the compose file, requests can be fetched using htsget:

```sh
docker compose up
```

Then:

```sh
curl http://127.0.0.1:8080/reads/bam/htsnexus_test_NA12878
```

Outputs:
```sh
{
"htsget": {
"format": "BAM",
"urls": [
{
"url": "http://data.minio:9000/bam/htsnexus_test_NA12878.bam?x-id=GetObject&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=user%2F20240320%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240320T014007Z&X-Amz-Expires=1000&X-Amz-SignedHeaders=host%3Brange&X-Amz-Signature=33a75bd6363ccbfd5ce8edf7e102a5edff8ca7cee17e3c654db01a880e98072d",
"headers": {
"Range": "bytes=0-2596770"
}
},
{
"url": "data:;base64,H4sIBAAAAAAA/wYAQkMCABsAAwAAAAAAAAAAAA=="
}
]
}
}
```

The url tickets can then be fetched within the compose network context:

```sh
docker exec -it minio curl -H "Range: bytes=0-2596770" "http://data.minio:9000/bam/htsnexus_test_NA12878.bam?x-id=GetObject&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=user%2F20240320%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240320T014007Z&X-Amz-Expires=1000&X-Amz-SignedHeaders=host%3Brange&X-Amz-Signature=33a75bd6363ccbfd5ce8edf7e102a5edff8ca7cee17e3c654db01a880e98072d"
```

[path-style-deprecated]: https://aws.amazon.com/blogs/aws/amazon-s3-path-deprecation-plan-the-rest-of-the-story/
[storage]: ../../../htsget-config/README.md#resolvers
[minio]: https://min.io/
[env-variables]: https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html
[virtual-addressing]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#virtual-hosted-style-access
[path-addressing]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#path-style-access
[compose]: compose.yml
[data]: ../../../data
56 changes: 56 additions & 0 deletions deploy/examples/minio/compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
version: "3"

services:
minio:
image: docker.io/bitnami/minio:latest
container_name: minio
ports:
- "9000:9000"
- "9001:9001"
environment:
- MINIO_ROOT_USER=user
- MINIO_ROOT_PASSWORD=password
# Create a bucket called `data`.
- MINIO_DEFAULT_BUCKETS=data
# This is required to enable virtual-hosted style S3 addressing.
- MINIO_DOMAIN=minio
networks:
default:
aliases:
# A network alias to the bucket is required if using virtual-hosted style addressing.
- data.minio
# This specifies the data which will be copied into the MinIO bucket.
volumes:
- ./../../../data:/tmp/data
# An example script to copy over data for testing.
command: >
/bin/bash -c "
/opt/bitnami/scripts/minio/run.sh &
until $(curl -s -f http://localhost:9000/minio/health/live); do
sleep 1
done &&
mc alias set minio http://minio:9000 user password;
mc mirror /tmp/data minio/data;
tail -f /dev/null
"
htsget-rs:
image: ghcr.io/umccr/htsget-rs:latest
container_name: htsget-rs
depends_on:
- minio
ports:
- "8080:8080"
- "8081:8081"
volumes:
- ./:/config
environment:
# Used to enable more log messages.
- RUST_LOG=debug
# Point to the config file that has the MinIO endpoint set.
- HTSGET_CONFIG=/config/config.toml
# The AWS sdk must have the same region set as the minio server.
- AWS_REGION=us-east-1
# The AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY must also match the minio user and password.
- AWS_ACCESS_KEY_ID=user
- AWS_SECRET_ACCESS_KEY=password
12 changes: 12 additions & 0 deletions deploy/examples/minio/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
data_server_enabled = false

[[resolvers]]
regex = '.*'
substitution_string = '$0'

[resolvers.storage]
bucket = 'data'
# The minio endpoint is set as the minio service within docker compose.
endpoint = 'http://minio:9000'
# Optionally, force path style addressing.
#path_style = true
Loading

0 comments on commit 1446dbb

Please sign in to comment.