Skip to content

Commit

Permalink
chore: next major version (#50)
Browse files Browse the repository at this point in the history
* feat!: added merge command and changed previous root command to impor… (#49)

* feat!: added merge command and changed previous root command to import sub-command

* docs: updated merge command

* fix: use structured, configurably-named checkpoint file

* feat: limit individual bundle size

* test: more logging and fixed tests

* fix: added SleepAfterImport to possibly address MERGE conflicts

* chore(release): 2.0.0

## [2.0.0](v1.3.2...v2.0.0) (2024-05-28)

### ⚠ BREAKING CHANGES

* added merge command and changed previous root command to impor… (#49)

### Features

* added merge command and changed previous root command to impor… ([#49](#49)) ([3c8fcbc](3c8fcbc))
* limit individual bundle size ([4ea3f83](4ea3f83))

### Bug Fixes

* added SleepAfterImport to possibly address MERGE conflicts ([e16fd4a](e16fd4a))
* use structured, configurably-named checkpoint file ([650868f](650868f))

### Miscellaneous Chores

* **deps:** update all non-major dependencies ([#46](#46)) ([4e3b2e0](4e3b2e0))
* **deps:** update mcr.microsoft.com/dotnet/sdk:8.0.204-jammy docker digest to 803a3c5 ([#39](#39)) ([bb6c947](bb6c947))
* **deps:** update miracum/.github action to v1.8.3 ([#47](#47)) ([7557d5d](7557d5d))

---------

Co-authored-by: semantic-release-bot <[email protected]>
  • Loading branch information
chgl and semantic-release-bot authored May 28, 2024
1 parent 7557d5d commit 269fa8b
Show file tree
Hide file tree
Showing 13 changed files with 50,481 additions and 138 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ name: ci

on:
push:
branches: [master]
branches:
- master
- next
release:
types: [created]
pull_request:
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ COPY src/PathlingS3Import/packages.lock.json .
RUN dotnet restore --locked-mode
COPY . .

ARG VERSION=1.3.2
ARG VERSION=2.0.0
RUN dotnet publish \
-c Release \
-p:Version=${VERSION} \
Expand Down
21 changes: 18 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Tool for automatically [$import'ing](https://pathling.csiro.au/docs/server/opera
See the help text of the command by simply running:

```sh
docker run --rm -it ghcr.io/miracum/pathling-s3-import:v1.3.2
docker run --rm -it ghcr.io/miracum/pathling-s3-import:v2.0.0
```

## Development
Expand All @@ -30,8 +30,10 @@ dotnet tool restore

Start the tool

### import

```sh
dotnet run --project src/PathlingS3Import/ -- \
dotnet run --project src/PathlingS3Import/ -- import \
--s3-endpoint=http://localhost:9000 \
--pathling-server-base-url=http://localhost:8082/fhir \
--s3-access-key=admin \
Expand All @@ -46,7 +48,7 @@ dotnet run --project src/PathlingS3Import/ -- \
Or to test importing from a checkpoint:

```sh
dotnet run --project src/PathlingS3Import/ -- \
dotnet run --project src/PathlingS3Import/ -- import \
--s3-endpoint=http://localhost:9000 \
--pathling-server-base-url=http://localhost:8082/fhir \
--s3-access-key=admin \
Expand All @@ -57,6 +59,19 @@ dotnet run --project src/PathlingS3Import/ -- \
--dry-run=false
```

### merge

```sh
dotnet run --project src/PathlingS3Import/ -- merge \
--s3-endpoint=http://localhost:9000 \
--s3-access-key=admin \
--s3-secret-key=miniopass \
--s3-bucket-name=fhir \
--s3-object-name-prefix=staging/ \
--max-merged-bundle-size=10 \
--dry-run=true
```

### Run E2E Tests

```sh
Expand Down
31 changes: 26 additions & 5 deletions compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,26 +38,47 @@ services:
mc alias set minio http://minio:9000 $${MINIO_SERVER_ACCESS_KEY} $${MINIO_SERVER_SECRET_KEY}
mc mb minio/fhir
mc cp /tmp/data/bundle-*.ndjson minio/fhir/staging/Patient/
mc cp /tmp/data/bundle-*.ndjson /tmp/data/pathling-s3-importer-last-imported.txt minio/fhir/staging-with-checkpoint/Patient/
mc cp /tmp/data/bundle-*.ndjson /tmp/data/_last-import-checkpoint.json minio/fhir/staging-with-checkpoint/Patient/
depends_on:
wait-for-minio:
condition: service_completed_successfully
volumes:
- $PWD/hack/data/:/tmp/data/:ro

pathling:
image: docker.io/aehrc/pathling:6.4.2@sha256:9b8ee32d4b8bb40192d6bf25814492a616153a0df15d178c286db9ec80c1c85e
image: docker.io/aehrc/pathling:7.0.1@sha256:70177a4eb7a20a5edba7a4957ac6cd245c29e3c306e98c5de59fe2974c1f71b8
ipc: none
security_opt:
- "no-new-privileges:true"
cap_drop:
- ALL
privileged: false
environment:
pathling.storage.warehouseUrl: s3://fhir
pathling.import.allowableSources: s3://fhir/staging
JAVA_TOOL_OPTIONS: |
-Xmx18g
-Xss64m
-Duser.timezone=UTC
--add-exports=java.base/sun.nio.ch=ALL-UNNAMED
--add-opens=java.base/java.net=ALL-UNNAMED
--add-opens=java.base/java.nio=ALL-UNNAMED
--add-opens=java.base/java.util=ALL-UNNAMED
--add-opens=java.base/java.lang.invoke=ALL-UNNAMED
pathling.storage.warehouseUrl: s3a://fhir
pathling.import.allowableSources: s3a://fhir/staging
pathling.terminology.enabled: false
pathling.terminology.serverUrl: http://localhost:8080/i-dont-exist
fs.s3a.endpoint: "http://minio:9000"
fs.s3a.aws.credentials.provider: "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider"
fs.s3a.access.key: "admin"
fs.s3a.secret.key: "miniopass"
fs.s3a.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem"
fs.s3a.path.style.access: "true"
spark.sql.parquet.compression.codec: "zstd"
spark.io.compression.codec: "zstd"
parquet.compression.codec.zstd.level: "9"
spark.serializer: "org.apache.spark.serializer.KryoSerializer"
spark.master: "local[4]"
spark.executor.memory: 4g
spark.driver.memory: 4g
ports:
- "8082:8080"
depends_on:
Expand Down
1 change: 1 addition & 0 deletions hack/data/_last-import-checkpoint.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"CreatedAt":"2024-05-27T20:29:16.4983552+00:00","LastImportedObjectUrl":"s3://fhir/staging-with-checkpoint/Patient/bundle-1708690114016.ndjson"}
50,000 changes: 50,000 additions & 0 deletions hack/data/bundle-1708690114045.ndjson

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion hack/data/pathling-s3-importer-last-imported.txt

This file was deleted.

3 changes: 2 additions & 1 deletion src/PathlingS3Import.Tests.E2E/Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ public async Task StartImportTool_WithRunningPathlingServerAndMinio_ShouldCreate

string[] args =
[
"import",
"--s3-endpoint=http://host.docker.internal:9000",
$"--pathling-server-base-url={pathlingServerBaseUrl}",
"--s3-access-key=admin",
Expand Down Expand Up @@ -81,6 +82,6 @@ public async Task StartImportTool_WithRunningPathlingServerAndMinio_ShouldCreate
);

response.Should().NotBeNull();
response!.Total.Should().Be(7);
response!.Total.Should().Be(50007);
}
}
120 changes: 120 additions & 0 deletions src/PathlingS3Import/Commands/CommandBase.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
using System.Reflection;
using System.Text.RegularExpressions;
using DotMake.CommandLine;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Console;
using Polly;
using Polly.Retry;

namespace PathlingS3Import;

public abstract partial class CommandBase
{
[GeneratedRegex(".*bundle-(?<timestamp>\\d*)\\.ndjson$")]
protected static partial Regex BundleObjectNameRegex();

[CliOption(Description = "The S3 endpoint URI", Name = "--s3-endpoint")]
public Uri? S3Endpoint { get; set; }

[CliOption(Description = "The S3 access key", Name = "--s3-access-key")]
public string? S3AccessKey { get; set; }

[CliOption(Description = "The S3 secret key", Name = "--s3-secret-key")]
public string? S3SecretKey { get; set; }

[CliOption(
Description = "The name of the bucket containing the resources to import",
Name = "--s3-bucket-name"
)]
public string? S3BucketName { get; set; } = "fhir";

[CliOption(
Description = "The S3 object name prefix. Corresponds to kafka-fhir-to-server's `S3_OBJECT_NAME_PREFIX`",
Name = "--s3-object-name-prefix"
)]
public string? S3ObjectNamePrefix { get; set; } = "";

[CliOption(
Description = "If enabled, list and read all objects but don't invoke the import operation or store the progress.",
Name = "--dry-run"
)]
public bool IsDryRun { get; set; } = false;

[CliOption(
Description = "If enabled, push metrics about the import to the specified Prometheus PushGateway.",
Name = "--enable-metrics"
)]
public bool IsMetricsEnabled { get; set; } = false;

[CliOption(
Description = "Endpoint URL for the Prometheus PushGateway.",
Name = "--pushgateway-endpoint",
Required = false
)]
public Uri? PushGatewayEndpoint { get; set; }

[CliOption(
Description = "Prometheus PushGateway job name.",
Name = "--pushgateway-job-name",
Required = false
)]
public string PushGatewayJobName { get; set; } =
Assembly.GetExecutingAssembly().GetName().Name!;

[CliOption(
Description = "Prometheus PushGateway job instance.",
Name = "--pushgateway-job-instance",
Required = false
)]
public string? PushGatewayJobInstance { get; set; }

[CliOption(
Description = "Value for the `Authorization` header",
Name = "--pushgateway-auth-header",
Required = false
)]
public string? PushGatewayAuthHeader { get; set; }

public ILoggerFactory LogFactory { get; set; }

public ResiliencePipeline RetryPipeline { get; set; }

protected CommandBase()
{
LogFactory = LoggerFactory.Create(builder =>
builder.AddSimpleConsole(options =>
{
options.IncludeScopes = true;
options.SingleLine = true;
options.ColorBehavior = LoggerColorBehavior.Disabled;
options.TimestampFormat = "yyyy-MM-dd HH:mm:ss ";
})
);

var log = LogFactory.CreateLogger<CommandBase>();

var retryOptions = new RetryStrategyOptions
{
ShouldHandle = new PredicateBuilder().Handle<Exception>(),
BackoffType = DelayBackoffType.Exponential,
UseJitter = true, // Adds a random factor to the delay
MaxRetryAttempts = 10,
Delay = TimeSpan.FromSeconds(10),
OnRetry = args =>
{
log.LogWarning(
args.Outcome.Exception,
"Retrying. Attempt: {AttemptNumber}. Duration: {Duration}.",
args.AttemptNumber,
args.Duration
);
// Event handlers can be asynchronous; here, we return an empty ValueTask.
return default;
}
};

RetryPipeline = new ResiliencePipelineBuilder()
.AddRetry(retryOptions) // Add retry using the default options
.Build(); // Builds the resilience pipeline
}
}
Loading

0 comments on commit 269fa8b

Please sign in to comment.