From 16e7653a8bb8bdca8f0cfba7659e8449df43e386 Mon Sep 17 00:00:00 2001 From: Vitalii Savitskii Date: Thu, 18 Jul 2024 12:07:49 +0200 Subject: [PATCH 01/10] Update helm chart and Dockerfile Move code from the old repository to migrate service to new infrastructure --- .container/Dockerfile | 44 +++++++---- .helm/Chart.yaml | 3 +- .helm/templates/_helpers.tpl | 6 ++ .helm/templates/deployment-ingestion.yaml | 96 +++++++++++++++++++++++ .helm/templates/hpa-ingestion.yaml | 43 ++++++++++ .helm/templates/service-account.yaml | 11 +-- .helm/templates/service-ingestion.yaml | 16 ++++ .helm/values.yaml | 27 +++++++ 8 files changed, 221 insertions(+), 25 deletions(-) create mode 100644 .helm/templates/deployment-ingestion.yaml create mode 100644 .helm/templates/hpa-ingestion.yaml create mode 100644 .helm/templates/service-ingestion.yaml diff --git a/.container/Dockerfile b/.container/Dockerfile index be05ef6..05788a4 100644 --- a/.container/Dockerfile +++ b/.container/Dockerfile @@ -1,35 +1,47 @@ -FROM mcr.microsoft.com/dotnet/sdk:6.0 AS build-env +# The `platform` argument here is required, since dotnet-sdk crashes with segmentation fault +# in case of arm64 builds, see https://github.com/dotnet/dotnet-docker/issues/4225 for details +FROM --platform=$BUILDPLATFORM mcr.microsoft.com/dotnet/sdk:8.0 AS build-env -ARG PROJECT_NAME="Arcane.Ingestion" +ARG INSTALL_DD_TRACER="true" +ARG TRACER_VERSION="2.49.0" +ARG TARGETARCH WORKDIR /app # Copy csproj and restore as distinct layers COPY src/*.csproj ./ -RUN dotnet restore +RUN dotnet_arch=$(test "$TARGETARCH" = "amd64" && echo "x64" || echo "$TARGETARCH") && \ + dotnet restore --runtime "linux-$dotnet_arch" # Copy everything else and build COPY src/. ./ -RUN dotnet publish "$PROJECT_NAME.csproj" -c Release -o out +RUN dotnet_arch=$(test "$TARGETARCH" = "amd64" && echo "x64" || echo "$TARGETARCH") && \ + dotnet publish "Arcane.Stream.BlobStorage.csproj" -c Release -o out --runtime "linux-$dotnet_arch" # Build runtime image -FROM mcr.microsoft.com/dotnet/aspnet:6.0-bullseye-slim -LABEL org.opencontainers.image.source=https://github.com/SneaksAndData/arcane-ingestion +FROM mcr.microsoft.com/dotnet/aspnet:8.0-bookworm-slim -ARG TRACER_VERSION="2.32.0" -ARG PROJECT_NAME -ENV PROJECT_ASSEMBLY=$PROJECT_NAME +ARG TRACER_VERSION="2.49.0" +ARG INSTALL_DD_TRACER="true" +ARG TARGETARCH RUN apt-get update -y && apt-get install -y curl jq # Download and install the Datadog Tracer -RUN mkdir -p /opt/datadog \ - && mkdir -p /var/log/datadog \ - && curl -LO https://github.com/DataDog/dd-trace-dotnet/releases/download/v${TRACER_VERSION}/datadog-dotnet-apm_${TRACER_VERSION}_amd64.deb \ - && dpkg -i ./datadog-dotnet-apm_${TRACER_VERSION}_amd64.deb \ - && rm ./datadog-dotnet-apm_${TRACER_VERSION}_amd64.deb +RUN if [ -z "$INSTALL_DD_TRACER" ]; then \ + echo "Datadog tracer installation skipped"; \ + else \ + mkdir -p /opt/datadog \ + && echo $TARGETARCH \ + && mkdir -p /var/log/datadog \ + && curl -LO https://github.com/DataDog/dd-trace-dotnet/releases/download/v${TRACER_VERSION}/datadog-dotnet-apm_${TRACER_VERSION}_${TARGETARCH}.deb \ + && dpkg -i ./datadog-dotnet-apm_${TRACER_VERSION}_${TARGETARCH}.deb \ + && rm ./datadog-dotnet-apm_${TRACER_VERSION}_${TARGETARCH}.deb ; \ + fi; - WORKDIR /app COPY --from=build-env /app/out . -ENTRYPOINT "dotnet" "$PROJECT_ASSEMBLY.dll" + +USER app + +ENTRYPOINT ["dotnet", "Arcane.Stream.BlobStorage.dll"] diff --git a/.helm/Chart.yaml b/.helm/Chart.yaml index 7bf6f3b..c32104e 100644 --- a/.helm/Chart.yaml +++ b/.helm/Chart.yaml @@ -1,7 +1,8 @@ apiVersion: v2 name: arcane-ingestion description: | - Arcane is a Akka.NET-based data ingestion service that can be used to ingest data from various sources and store it in a data lake. + Arcane is a Akka.NET-based data ingestion service that can be used to ingest + data from various sources and store it in a data lake. This chart deploys the Arcane Ingestion service. type: application version: 0.0.0 diff --git a/.helm/templates/_helpers.tpl b/.helm/templates/_helpers.tpl index 6815276..dc76233 100644 --- a/.helm/templates/_helpers.tpl +++ b/.helm/templates/_helpers.tpl @@ -56,3 +56,9 @@ Create the name of the service account to use {{- default "default" .Values.serviceAccount.name }} {{- end }} {{- end }} + +{{- define "app.tracerIngestionLabels" -}} +tags.datadoghq.com/env: {{ .Values.environment }} +tags.datadoghq.com/service: "arcane-ingestion" +tags.datadoghq.com/version: {{ .Values.image.tag }} +{{- end -}} diff --git a/.helm/templates/deployment-ingestion.yaml b/.helm/templates/deployment-ingestion.yaml new file mode 100644 index 0000000..327e906 --- /dev/null +++ b/.helm/templates/deployment-ingestion.yaml @@ -0,0 +1,96 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "app.name" . }}-ingestion + labels: + app.kubernetes.io/name: {{ template "app.name" . }} + {{- include "app.labels" $ | nindent 4 }} + {{- include "app.tracerIngestionLabels" $ | nindent 4 }} +spec: + selector: + matchLabels: + app.kubernetes.io/name: {{ template "app.name" . }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + replicas: 1 + template: + metadata: + labels: + app.kubernetes.io/name: {{ template "app.name" . }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + {{- include "app.tracerIngestionLabels" $ | nindent 8 }} + annotations: + deployment/date: {{ now }} + spec: + serviceAccountName: {{ template "app.serviceAccountName" . }} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: {{ .Values.nodes.taint }} + operator: In + values: + - {{ .Values.nodes.taintValue }} + tolerations: + - key: {{ .Values.nodes.taint }} + operator: Equal + value: {{ .Values.nodes.taintValue }} + effect: NoSchedule + containers: + - name: arcane + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: IfNotPresent + livenessProbe: + exec: + command: + - /bin/bash + - '-c' + - | + + curl --fail http://localhost:80/health + + initialDelaySeconds: 10 + timeoutSeconds: 5 + periodSeconds: 30 + successThreshold: 1 + failureThreshold: 10 + env: + - name: ASPNETCORE_ENVIRONMENT + value: {{ .Values.environment }} + - name: APPLICATION_VERSION + value: {{ .Values.image.tag }} + - name: APPLICATION_HOST_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: PROTEUS__DATADOG_SITE + value: {{ .Values.logs.datadogSite }} + - name: PROTEUS__DD_STATSD_HOST + value: {{ .Values.metrics.statsd_host }} + - name: PROTEUS__DD_STATSD_PORT + value: {{ .Values.metrics.statsd_port | quote }} + - name: PROTEUS__DEFAULT_LOG_LEVEL + value: {{ .Values.logs.logLevel | quote }} + volumeMounts: + - name: apmsocketpath + mountPath: /var/run/datadog + - name: arcane-config + mountPath: /app/appsettings.json + subPath: appsettings.json + - name: {{ template "app.name" . }}-secrets + mountPath: /app/secrets/ + readOnly: true + resources: + limits: + cpu: {{ .Values.limits.ingestion.cpu }} + memory: {{ .Values.limits.ingestion.memory }} + requests: + cpu: {{ .Values.limits.ingestion.cpu }} + memory: {{ .Values.limits.ingestion.memory }} + volumes: + - name: apmsocketpath + hostPath: + path: /var/run/datadog/ + - name: arcane-config + configMap: + name: {{ template "app.name" . }}-config diff --git a/.helm/templates/hpa-ingestion.yaml b/.helm/templates/hpa-ingestion.yaml new file mode 100644 index 0000000..946bb46 --- /dev/null +++ b/.helm/templates/hpa-ingestion.yaml @@ -0,0 +1,43 @@ +--- +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ template "app.name" . }}-ingestion +spec: + behavior: + scaleDown: + policies: + - + periodSeconds: {{ .Values.hpa.scaleDown.period }} + type: Pods + value: {{ .Values.hpa.scaleDown.pods }} + - + periodSeconds: {{ .Values.hpa.scaleDown.period }} + type: Percent + value: {{ .Values.hpa.scaleDown.percent }} + stabilizationWindowSeconds: {{ .Values.hpa.scaleDown.stabilizationWindow }} + scaleUp: + policies: + - + periodSeconds: {{ .Values.hpa.scaleUp.period }} + type: Percent + value: {{ .Values.hpa.scaleUp.percent }} + - + periodSeconds: {{ .Values.hpa.scaleUp.period }} + type: Pods + value: {{ .Values.hpa.scaleUp.pods }} + stabilizationWindowSeconds: {{ .Values.hpa.scaleUp.stabilizationWindow }} + maxReplicas: {{ .Values.hpa.maxReplicas }} + metrics: + - + resource: + name: cpu + target: + averageUtilization: {{ .Values.hpa.cpuTarget }} + type: Utilization + type: Resource + minReplicas: 3 + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ template "app.name" . }}-ingestion \ No newline at end of file diff --git a/.helm/templates/service-account.yaml b/.helm/templates/service-account.yaml index bfeb176..62adab4 100644 --- a/.helm/templates/service-account.yaml +++ b/.helm/templates/service-account.yaml @@ -1,12 +1,7 @@ -{{- if .Values.serviceAccount.create -}} apiVersion: v1 kind: ServiceAccount metadata: - name: {{ include "app.serviceAccountName" . }} + name: {{ template "app.serviceAccountName" . }} labels: - {{- include "app.labels" . | nindent 4 }} - {{- with .Values.serviceAccount.annotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} -{{- end }} + app.kubernetes.io/name: {{ template "app.serviceAccountName" . }} + {{- include "app.labels" $ | nindent 4 }} diff --git a/.helm/templates/service-ingestion.yaml b/.helm/templates/service-ingestion.yaml new file mode 100644 index 0000000..3f6ef89 --- /dev/null +++ b/.helm/templates/service-ingestion.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "app.name" . }} + labels: + app.kubernetes.io/name: {{ template "app.name" . }} + {{- include "app.labels" $ | nindent 4 }} +spec: + type: ClusterIP + ports: + - port: 5000 + targetPort: 8080 + protocol: TCP + selector: + app.kubernetes.io/name: {{ template "app.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} diff --git a/.helm/values.yaml b/.helm/values.yaml index f62a4df..bc34c61 100644 --- a/.helm/values.yaml +++ b/.helm/values.yaml @@ -1,6 +1,33 @@ environment: "Development" replicaCount: 1 +nodes: + taint: "kubernetes.sneaksanddata.com/servicenodetype" + taintValue: "arcane" + +logs: + datadogSite: "https://http-intake.logs.datadoghq.eu" + logLevel: "INFO" + +limits: + ingestion: + cpu: '1000m' + memory: '1500Mi' + +hpa: + scaleDown: + pods: 1 + percent: 10 + period: 180 + stabilizationWindow: 300 + scaleUp: + pods: 1 + percent: 10 + period: 30 + stabilizationWindow: 120 + maxReplicas: 10 + cpuTarget: 35 + image: repository: arcane-ingestion From 88eb70b1a20c3ad78ae80fcf55272a8757adbef1 Mon Sep 17 00:00:00 2001 From: Vitalii Savitskii Date: Thu, 18 Jul 2024 13:11:53 +0200 Subject: [PATCH 02/10] Move the actual code --- src/Arcane.Ingestion.csproj | 3 +- .../JsonIngestionConfiguration.cs | 46 +++++++++++ src/Controllers/IngestionController.cs | 35 +++++++++ src/Metrics/DeclaredMetrics.cs | 12 +++ src/Program.cs | 40 +++++++++- src/Services/Base/IIngestionService.cs | 7 ++ src/Services/Streams/JsonIngestionService.cs | 64 ++++++++++++++++ src/Startup.cs | 76 +++++++++++++++++++ 8 files changed, 279 insertions(+), 4 deletions(-) create mode 100644 src/Configurations/JsonIngestionConfiguration.cs create mode 100644 src/Controllers/IngestionController.cs create mode 100644 src/Metrics/DeclaredMetrics.cs create mode 100644 src/Services/Base/IIngestionService.cs create mode 100644 src/Services/Streams/JsonIngestionService.cs create mode 100644 src/Startup.cs diff --git a/src/Arcane.Ingestion.csproj b/src/Arcane.Ingestion.csproj index 80fa1ce..09c82a7 100644 --- a/src/Arcane.Ingestion.csproj +++ b/src/Arcane.Ingestion.csproj @@ -8,7 +8,8 @@ - + + diff --git a/src/Configurations/JsonIngestionConfiguration.cs b/src/Configurations/JsonIngestionConfiguration.cs new file mode 100644 index 0000000..1dbe4b6 --- /dev/null +++ b/src/Configurations/JsonIngestionConfiguration.cs @@ -0,0 +1,46 @@ +using System; +using System.Diagnostics.CodeAnalysis; + +namespace Arcane.Ingestion.Configurations; + +/// +/// Configuration for a json ingestion endpoint. +/// +[ExcludeFromCodeCoverage(Justification = "Model")] +public class JsonIngestionConfiguration +{ + /// + /// Size of an Akka MergeHub Buffer for this endpoint. + /// + public int BufferSize { get; set; } + + /// + /// Document processing rate per . + /// + public int ThrottleDocumentLimit { get; set; } + + /// + /// Number of documents to receive before throttling kicks in. + /// + public int ThrottleDocumentBurst { get; set; } + + /// + /// Document processing rate (time). + /// + public TimeSpan ThrottleTimespan { get; set; } + + /// + /// Max number of JSON documents in a single output file. + /// + public int MaxDocumentsPerFile { get; set; } + + /// + /// Grouping interval for received records. + /// + public TimeSpan GroupingInterval { get; set; } + + /// + /// Base location to save data in. Must follow format required by underlying storage service (az, s3 etc.). + /// + public string IngestionSinkPath { get; set; } +} diff --git a/src/Controllers/IngestionController.cs b/src/Controllers/IngestionController.cs new file mode 100644 index 0000000..818ad93 --- /dev/null +++ b/src/Controllers/IngestionController.cs @@ -0,0 +1,35 @@ +using System.Diagnostics.CodeAnalysis; +using System.Net; +using System.Text.Json; +using Arcane.Ingestion.Services.Base; +using Microsoft.AspNetCore.Mvc; +using Microsoft.Extensions.Logging; + +namespace Arcane.Ingestion.Controllers +{ + [ExcludeFromCodeCoverage] + [ApiController] + [Route("[controller]")] + public class IngestionController : ControllerBase + { + private readonly ILogger logger; + private readonly IIngestionService jsonService; + + public IngestionController(ILogger logger, IIngestionService jsonService) + { + this.logger = logger; + this.jsonService = jsonService; + } + + [HttpPost("json/{source}")] + [ProducesResponseType((int)HttpStatusCode.InternalServerError)] + [ProducesResponseType((int)HttpStatusCode.BadRequest)] + public ObjectResult Ingest([FromBody] JsonDocument record, string source) + { + this.logger.LogDebug("Received record for {source}", source); + + this.jsonService.Ingest(source, record); + return this.Accepted(); + } + } +} diff --git a/src/Metrics/DeclaredMetrics.cs b/src/Metrics/DeclaredMetrics.cs new file mode 100644 index 0000000..1d1959c --- /dev/null +++ b/src/Metrics/DeclaredMetrics.cs @@ -0,0 +1,12 @@ +namespace Arcane.Ingestion.Metrics +{ + /// + /// Metrics published by Arcane. + /// + public static class DeclaredMetrics + { + public const string ROWS_INCOMING = "rows.incoming"; + public const string ROWS_INGESTED = "rows.ingested"; + public const string DOCUMENTS_INGESTED = "documents.ingested"; + } +} diff --git a/src/Program.cs b/src/Program.cs index 71e7a3a..3a6ed70 100644 --- a/src/Program.cs +++ b/src/Program.cs @@ -1,5 +1,39 @@ -// See https://aka.ms/new-console-template for more information +using System; +using System.Diagnostics.CodeAnalysis; +using Microsoft.AspNetCore.Hosting; +using Microsoft.Extensions.Hosting; +using Serilog; +using Snd.Sdk.Logs.Providers; +using Snd.Sdk.Logs.Providers.Configurations; -using System; +namespace Arcane.Ingestion +{ + [ExcludeFromCodeCoverage] + public class Program + { + public static int Main(string[] args) + { + Log.Logger = DefaultLoggingProvider.CreateBootstrapLogger(nameof(Arcane)); + try + { + Log.Information("Starting web host"); + CreateHostBuilder(args).Build().Run(); + return 0; + } + catch (Exception ex) + { + Log.Fatal(ex, "Host terminated unexpectedly"); + return 1; + } + finally + { + Log.CloseAndFlush(); + } + } -Console.WriteLine("Hello, World!"); \ No newline at end of file + public static IHostBuilder CreateHostBuilder(string[] args) => + Host.CreateDefaultBuilder(args) + .AddSerilogLogger(nameof(Arcane), loggerConfiguration => loggerConfiguration.Default().AddDatadog()) + .ConfigureWebHostDefaults(webBuilder => { webBuilder.UseStartup(); }); + } +} diff --git a/src/Services/Base/IIngestionService.cs b/src/Services/Base/IIngestionService.cs new file mode 100644 index 0000000..d862201 --- /dev/null +++ b/src/Services/Base/IIngestionService.cs @@ -0,0 +1,7 @@ +namespace Arcane.Ingestion.Services.Base +{ + public interface IIngestionService + { + public void Ingest(string destinationName, T row); + } +} diff --git a/src/Services/Streams/JsonIngestionService.cs b/src/Services/Streams/JsonIngestionService.cs new file mode 100644 index 0000000..3bf5293 --- /dev/null +++ b/src/Services/Streams/JsonIngestionService.cs @@ -0,0 +1,64 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.Json; +using Akka; +using Akka.Streams; +using Akka.Streams.Dsl; +using Arcane.Framework.Sinks.Json; +using Arcane.Ingestion.Configurations; +using Arcane.Ingestion.Metrics; +using Arcane.Ingestion.Services.Base; +using Microsoft.Extensions.Options; +using Snd.Sdk.Metrics.Base; +using Snd.Sdk.Storage.Base; + +namespace Arcane.Ingestion.Services.Streams +{ + public class JsonIngestionService : IIngestionService + { + private readonly IBlobStorageService blobStorageService; + private readonly IMaterializer materializer; + private readonly JsonIngestionConfiguration serviceConfig; + private readonly MetricsService metricsService; + private readonly IRunnableGraph> graph; + private readonly Sink<(string, DateTimeOffset, JsonDocument), NotUsed> graphSink; + + public JsonIngestionService(IOptions options, IBlobStorageService blobStorageService, MetricsService metricsService, IMaterializer materializer) + { + this.blobStorageService = blobStorageService; + this.materializer = materializer; + this.serviceConfig = options.Value; + this.metricsService = metricsService; + + this.graph = this.GetGraph(); + this.graphSink = this.graph.Run(this.materializer); + } + + public void Ingest(string destinationName, JsonDocument json) + { + this.metricsService.Increment(DeclaredMetrics.DOCUMENTS_INGESTED, new SortedDictionary { { "ingestion_source", destinationName } }); + Source.Single((destinationName, DateTimeOffset.UtcNow, json)).RunWith(this.graphSink, this.materializer); + } + + private IRunnableGraph> GetGraph() + { + return MergeHub + .Source<(string, DateTimeOffset, JsonDocument)>(perProducerBufferSize: this.serviceConfig.BufferSize) + .Throttle(elements: this.serviceConfig.ThrottleDocumentLimit, + per: this.serviceConfig.ThrottleTimespan, + maximumBurst: this.serviceConfig.ThrottleDocumentBurst, + mode: ThrottleMode.Shaping) + .GroupedWithin(this.serviceConfig.MaxDocumentsPerFile, this.serviceConfig.GroupingInterval) + .SelectMany(batch => batch.GroupBy(v => v.Item1)) + .Select(v => + { + var groupName = v.Key; + var groupRecords = v.Select(grp => (grp.Item2, grp.Item3)).ToList(); + this.metricsService.Gauge(DeclaredMetrics.ROWS_INGESTED, groupRecords.Count, new SortedDictionary { { "ingestion_source", groupName } }); + return (groupName, groupRecords); + }) + .To(JsonSink.Create(this.blobStorageService, this.serviceConfig.IngestionSinkPath)); + } + } +} diff --git a/src/Startup.cs b/src/Startup.cs new file mode 100644 index 0000000..98d1462 --- /dev/null +++ b/src/Startup.cs @@ -0,0 +1,76 @@ +using System.Diagnostics.CodeAnalysis; +using System.Text.Json; +using System.Text.Json.Serialization; +using Arcane.Ingestion.Configurations; +using Arcane.Ingestion.Services.Base; +using Arcane.Ingestion.Services.Streams; +using Azure.Data.Tables; +using Microsoft.AspNetCore.Builder; +using Microsoft.AspNetCore.Hosting; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; +using Microsoft.OpenApi.Models; +using Snd.Sdk.ActorProviders; +using Snd.Sdk.Kubernetes.Providers; +using Snd.Sdk.Metrics.Configurations; +using Snd.Sdk.Metrics.Providers; +using Snd.Sdk.Storage.Providers; +using Snd.Sdk.Storage.Providers.Configurations; + +namespace Arcane.Ingestion +{ + [ExcludeFromCodeCoverage] + public class Startup + { + public Startup(IConfiguration configuration) + { + this.Configuration = configuration; + } + + public IConfiguration Configuration { get; } + + public void ConfigureServices(IServiceCollection services) + { + // service config injections + services.Configure(this.Configuration.GetSection(nameof(JsonIngestionConfiguration))); + + + services.AddLocalActorSystem(); + + services.AddAzureBlob(AzureStorageConfiguration.CreateDefault()); + services.AddAzureTable(AzureStorageConfiguration.CreateDefault()); + services.AddDatadogMetrics(DatadogConfiguration.Default(nameof(Arcane))); + + services.AddSingleton, JsonIngestionService>(); + services.AddKubernetes(); + + services.AddHealthChecks(); + + services.AddControllers().AddJsonOptions(options => options.JsonSerializerOptions.Converters.Add(new JsonStringEnumConverter())); + services.AddSwaggerGen(c => + { + c.SwaggerDoc("v1", new OpenApiInfo { Title = "Arcane", Version = "v1" }); + }); + } + + // This method gets called by the runtime. Use this method to configure the HTTP request pipeline. + public void Configure(IApplicationBuilder app, IWebHostEnvironment env, IHostApplicationLifetime hostApplicationLifetime) + { + if (env.IsDevelopment()) + { + app.UseDeveloperExceptionPage(); + app.UseSwagger(); + app.UseSwaggerUI(c => c.SwaggerEndpoint("/swagger/v1/swagger.json", "Arcane v1")); + } + + app.UseRouting(); + + app.UseEndpoints(endpoints => + { + endpoints.MapControllers(); + endpoints.MapHealthChecks("/health"); + }); + } + } +} From 83230061d5366204054ebb572eb5a6f82311b3ff Mon Sep 17 00:00:00 2001 From: Vitalii Savitskii Date: Thu, 18 Jul 2024 13:18:23 +0200 Subject: [PATCH 03/10] Add pipelines --- .github/workflows/build.yaml | 81 ++++++++++++++++++-- .github/workflows/cleanup-repository.yaml | 35 +++++++++ .github/workflows/publish-ecr-public.yaml | 92 +++++++++++++++++++++++ .github/workflows/release.yaml | 2 +- src/Arcane.Ingestion.csproj | 2 +- 5 files changed, 202 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/cleanup-repository.yaml create mode 100644 .github/workflows/publish-ecr-public.yaml diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index bb97e3c..f733785 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -5,35 +5,100 @@ on: branches: [ main ] pull_request: branches: [ main ] - + env: PROJECT_NAME: Arcane.Ingestion - + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + jobs: validate_commit: name: Validate commit runs-on: ubuntu-latest if: ${{ github.ref != 'refs/heads/main' }} + permissions: + pull-requests: write + contents: read steps: - uses: actions/checkout@v4 - + - name: Setup .NET uses: actions/setup-dotnet@v4.0.0 with: - dotnet-version: 6.0.x - + dotnet-version: 8.0.x + - name: Build run: dotnet build - + - name: Test working-directory: ./test run: | dotnet add package coverlet.msbuild && dotnet test ${PROJECT_NAME}.Tests.csproj --configuration Debug --runtime linux-x64 /p:CollectCoverage=true /p:CoverletOutput=Coverage/ /p:CoverletOutputFormat=lcov --logger GitHubActions - + - name: Publish Code Coverage if: ${{ github.event_name == 'pull_request' && always() }} - uses: romeovs/lcov-reporter-action@v0.3.1 + uses: romeovs/lcov-reporter-action@v0.4.0 with: github-token: ${{ secrets.GITHUB_TOKEN }} lcov-file: ./test/Coverage/coverage.info + + build_image: + name: Build Docker Image and Helm Charts + runs-on: ubuntu-latest + needs: [ validate_commit ] + if: ${{ always() && (needs.validate_commit.result == 'success' || needs.validate_commit.result == 'skipped') }} + permissions: + contents: read + packages: write + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Log in to the Container registry + uses: docker/login-action@v3.2.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Get project version + uses: SneaksAndData/github-actions/generate_version@v0.1.9 + id: version + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=semver,pattern={{version}},value=${{steps.version.outputs.version}} + flavor: + latest=false + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3.4.0 + with: + use: true + platforms: linux/arm64,linux/amd64 + + - name: Build and push Docker image + uses: docker/build-push-action@v6.2.0 + with: + context: . + file: .container/Dockerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + platforms: linux/arm64,linux/amd64 + + - name: Build and Push Chart + uses: SneaksAndData/github-actions/build_helm_chart@v0.1.9 + with: + application: ${{ github.event.repository.name }} + app_version: ${{ steps.meta.outputs.version }} + container_registry_user: ${{ github.actor }} + container_registry_token: ${{ secrets.GITHUB_TOKEN }} + container_registry_address: ghcr.io/sneaksanddata/ diff --git a/.github/workflows/cleanup-repository.yaml b/.github/workflows/cleanup-repository.yaml new file mode 100644 index 0000000..5809d6d --- /dev/null +++ b/.github/workflows/cleanup-repository.yaml @@ -0,0 +1,35 @@ +name: Remove old artifacts +on: +# schedule: +# - cron: '0 12 * * *' # every day at 12:00 UTC + workflow_dispatch: + +jobs: + remove_old_artifacts: + name: Remove old artifacts + runs-on: ubuntu-latest + + permissions: + contents: read + packages: write + + timeout-minutes: 10 # stop the task if it takes longer + + steps: + - name: Delete old package versions of ${{ github.repository }} + uses: actions/delete-package-versions@v5.0.0 + with: + package-name: ${{ github.repository }} + package-type: container + token: ${{ secrets.GITHUB_TOKEN }} + min-versions-to-keep: 10 + delete-only-pre-release-versions: "true" + + - name: Delete old package versions of helm/${{ github.repository }} + uses: actions/delete-package-versions@v5.0.0 + with: + package-name: helm/${{ github.repository }} + package-type: container + token: ${{ secrets.GITHUB_TOKEN }} + min-versions-to-keep: 10 + delete-only-pre-release-versions: "true" diff --git a/.github/workflows/publish-ecr-public.yaml b/.github/workflows/publish-ecr-public.yaml new file mode 100644 index 0000000..6776a82 --- /dev/null +++ b/.github/workflows/publish-ecr-public.yaml @@ -0,0 +1,92 @@ +name: Publish Arcane.Stream.BlobStorage to ECR public registry +run-name: Publish Arcane.Stream.BlobStorage to public.ecr.aws by @${{ github.actor }} + +on: + workflow_dispatch: + inputs: + version: + description: | + Version number to publish. Defaults to the latest git tag in the repository. + This version MUST exist in the ghcr.io registry. + required: false + default: "current" + +env: + PROJECT_NAME: Arcane.Stream.BlobStorage + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + publish_image: + name: Publish Docker Image to ECR Public + runs-on: ubuntu-latest + # if: ${{ startsWith(github.ref, 'refs/tags') }} + + permissions: + contents: read + id-token: write + + steps: + + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + + - name: Get project version + uses: SneaksAndData/github-actions/generate_version@v0.1.9 + id: current_version + + - name: Set up variables + env: + VERSION: ${{ inputs.version }} + CURRENT_VERSION: ${{steps.current_version.outputs.version}} + run: | + test "$VERSION" == "current" && echo "IMAGE_VERSION=$CURRENT_VERSION" >> ${GITHUB_ENV} || echo "IMAGE_VERSION=$VERSION" >> ${GITHUB_ENV} + + - name: Import AWS Secrets + uses: hashicorp/vault-action@v3.0.0 + with: + url: https://hashicorp-vault.awsp.sneaksanddata.com/ + role: github + method: jwt + secrets: | + /secret/data/common/package-publishing/aws-ecr-public/production/container-user-public access_key | ACCESS_KEY ; + /secret/data/common/package-publishing/aws-ecr-public/production/container-user-public access_key_id | ACCESS_KEY_ID ; + id: aws_secrets + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4.0.2 + with: + aws-access-key-id: ${{ env.ACCESS_KEY_ID }} + aws-secret-access-key: ${{ env.ACCESS_KEY }} + aws-region: us-east-1 + + - name: Log in to the GitHub Container Registry + uses: docker/login-action@v3.1.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Login to Amazon ECR Public + uses: aws-actions/amazon-ecr-login@v2 + with: + registry-type: public + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=semver,pattern={{version}},value=${{ env.IMAGE_VERSION }} + flavor: + latest=false + + - name: Push image to ECR Public registry + uses: akhilerm/tag-push-action@v2.2.0 + with: + src: ${{ steps.meta.outputs.tags }} + dst: public.ecr.aws/s0t1h2z6/arcane/${{ github.event.repository.name }}:${{ steps.meta.outputs.version }} diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 9ff5683..e0ea0e9 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -13,7 +13,7 @@ jobs: with: fetch-depth: 0 - name: Create Release - uses: SneaksAndData/github-actions/semver_release@v0.1.6 + uses: SneaksAndData/github-actions/semver_release@v0.1.9 with: major_v: 0 minor_v: 0 diff --git a/src/Arcane.Ingestion.csproj b/src/Arcane.Ingestion.csproj index 09c82a7..9025c24 100644 --- a/src/Arcane.Ingestion.csproj +++ b/src/Arcane.Ingestion.csproj @@ -1,7 +1,7 @@ - net6.0 + net8.0 10 Arcane.Ingestion Exe From a88cb794d16f172c567734163e98a8e6e53d176f Mon Sep 17 00:00:00 2001 From: Vitalii Savitskii Date: Thu, 18 Jul 2024 13:20:28 +0200 Subject: [PATCH 04/10] Build fix --- test/Arcane.Ingestion.Tests.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Arcane.Ingestion.Tests.csproj b/test/Arcane.Ingestion.Tests.csproj index 2f397c6..ca12f14 100644 --- a/test/Arcane.Ingestion.Tests.csproj +++ b/test/Arcane.Ingestion.Tests.csproj @@ -1,7 +1,7 @@ - net6.0 + net8.0 false From 5e2b93307471ec850fc9dc4490a94be53115b38d Mon Sep 17 00:00:00 2001 From: Vitalii Savitskii Date: Thu, 18 Jul 2024 13:26:38 +0200 Subject: [PATCH 05/10] Build fix --- .container/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.container/Dockerfile b/.container/Dockerfile index 05788a4..87ab9cd 100644 --- a/.container/Dockerfile +++ b/.container/Dockerfile @@ -16,7 +16,7 @@ RUN dotnet_arch=$(test "$TARGETARCH" = "amd64" && echo "x64" || echo "$TARGETARC # Copy everything else and build COPY src/. ./ RUN dotnet_arch=$(test "$TARGETARCH" = "amd64" && echo "x64" || echo "$TARGETARCH") && \ - dotnet publish "Arcane.Stream.BlobStorage.csproj" -c Release -o out --runtime "linux-$dotnet_arch" + dotnet publish "Arcane.Ingestion.csproj" -c Release -o out --runtime "linux-$dotnet_arch" # Build runtime image FROM mcr.microsoft.com/dotnet/aspnet:8.0-bookworm-slim @@ -44,4 +44,4 @@ COPY --from=build-env /app/out . USER app -ENTRYPOINT ["dotnet", "Arcane.Stream.BlobStorage.dll"] +ENTRYPOINT ["dotnet", "Arcane.Ingestion.dll"] From bda7c8813b084a253cf192725a7cef60356b1634 Mon Sep 17 00:00:00 2001 From: Vitalii Savitskii Date: Thu, 18 Jul 2024 15:02:58 +0200 Subject: [PATCH 06/10] Fix in values.yaml --- .helm/templates/configmap-appsettings.yaml | 16 +++++---- .helm/templates/deployment-ingestion.yaml | 26 +++++++------- .helm/values.yaml | 36 ++++++++++++++++++-- src/Services/Streams/JsonIngestionService.cs | 4 +-- 4 files changed, 58 insertions(+), 24 deletions(-) diff --git a/.helm/templates/configmap-appsettings.yaml b/.helm/templates/configmap-appsettings.yaml index 183243c..b870f09 100644 --- a/.helm/templates/configmap-appsettings.yaml +++ b/.helm/templates/configmap-appsettings.yaml @@ -8,12 +8,14 @@ metadata: data: appsettings.json: | { - "Logging": { - "LogLevel": { - "Default": "Information", - "Microsoft.AspNetCore": "Warning" - } + "AllowedHosts": "*", + "JsonIngestionConfiguration": { + "BufferSize": {{ .Values.settings.bufferSize }}, + "ThrottleDocumentLimit": {{ .Values.settings.throttleDocumentLimit }}, + "ThrottleDocumentBurst": {{ .Values.settings.throttleDocumentBurst }}, + "ThrottleTimespan": {{ .Values.settings.throttleTimespan | quote }}, + "MaxDocumentsPerFile": {{ .Values.settings.maxDocumentsPerFile }}, + "GroupingInterval": {{ .Values.settings.groupingInterval | quote }}, + "IngestionSinkPath": {{ .Values.settings.ingestionSinkPath | quote }} }, - "AllowedHosts": "*" } - diff --git a/.helm/templates/deployment-ingestion.yaml b/.helm/templates/deployment-ingestion.yaml index 327e906..a814df2 100644 --- a/.helm/templates/deployment-ingestion.yaml +++ b/.helm/templates/deployment-ingestion.yaml @@ -65,21 +65,21 @@ spec: fieldPath: metadata.name - name: PROTEUS__DATADOG_SITE value: {{ .Values.logs.datadogSite }} - - name: PROTEUS__DD_STATSD_HOST - value: {{ .Values.metrics.statsd_host }} - - name: PROTEUS__DD_STATSD_PORT - value: {{ .Values.metrics.statsd_port | quote }} - name: PROTEUS__DEFAULT_LOG_LEVEL value: {{ .Values.logs.logLevel | quote }} + {{- if .Values.jobTemplateSettings.extraEnvFrom }} + envFrom: + {{- with .Values.jobTemplateSettings.extraEnvFrom }} + {{- toYaml . | nindent 10 }} + {{- end }} + {{- end }} volumeMounts: - - name: apmsocketpath - mountPath: /var/run/datadog - name: arcane-config mountPath: /app/appsettings.json subPath: appsettings.json - - name: {{ template "app.name" . }}-secrets - mountPath: /app/secrets/ - readOnly: true + {{- with .Values.extraVolumeMounts }} + {{- toYaml . | nindent 8 }} + {{- end }} resources: limits: cpu: {{ .Values.limits.ingestion.cpu }} @@ -88,9 +88,11 @@ spec: cpu: {{ .Values.limits.ingestion.cpu }} memory: {{ .Values.limits.ingestion.memory }} volumes: - - name: apmsocketpath - hostPath: - path: /var/run/datadog/ - name: arcane-config configMap: name: {{ template "app.name" . }}-config + {{- if .Values.extraVolumes }} + {{- with .Values.extraVolumes }} + {{- toYaml . | nindent 6 }} + {{- end }} + {{- end }} diff --git a/.helm/values.yaml b/.helm/values.yaml index bc34c61..2ee539c 100644 --- a/.helm/values.yaml +++ b/.helm/values.yaml @@ -43,6 +43,36 @@ serviceAccount: # If not set and create is true, a name is generated using the fullname template name: "" -resources: - cpu: 100m - memory: 128Mi +resources: { } + + +settings: + bufferSize: 16 + throttleDocumentLimit: 256 + throttleDocumentBurst: 1024 + throttleTimespan: "0.00:00:01" + maxDocumentsPerFile: 1024 + groupingInterval: "0.00:00:30" + ingestionSinkPath: "ingestion@" + +# Extra volumes to add to the streaming job +extraVolumeMounts: [] +# Example: +# +# - mountPath: /data +# name: data-volume + +# Extra volumes to add to the streaming job +extraVolumes: [] +# Example: +# +# - mountPath: /data +# name: data-volume + +# Extra environment variables referencing a ConfigMap or Secret +extraEnvFrom: [] +# Example: +# +# envFrom: +# - configMapRef: +# name: custom-api-access-token diff --git a/src/Services/Streams/JsonIngestionService.cs b/src/Services/Streams/JsonIngestionService.cs index 3bf5293..381a32a 100644 --- a/src/Services/Streams/JsonIngestionService.cs +++ b/src/Services/Streams/JsonIngestionService.cs @@ -17,14 +17,14 @@ namespace Arcane.Ingestion.Services.Streams { public class JsonIngestionService : IIngestionService { - private readonly IBlobStorageService blobStorageService; + private readonly IBlobStorageWriter blobStorageService; private readonly IMaterializer materializer; private readonly JsonIngestionConfiguration serviceConfig; private readonly MetricsService metricsService; private readonly IRunnableGraph> graph; private readonly Sink<(string, DateTimeOffset, JsonDocument), NotUsed> graphSink; - public JsonIngestionService(IOptions options, IBlobStorageService blobStorageService, MetricsService metricsService, IMaterializer materializer) + public JsonIngestionService(IOptions options, IBlobStorageWriter blobStorageService, MetricsService metricsService, IMaterializer materializer) { this.blobStorageService = blobStorageService; this.materializer = materializer; From 717f50e0f168dcc746e5e58c03138c08ded63445 Mon Sep 17 00:00:00 2001 From: Vitalii Savitskii Date: Thu, 18 Jul 2024 15:08:55 +0200 Subject: [PATCH 07/10] Fix extraenvFrom --- .helm/templates/deployment-ingestion.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.helm/templates/deployment-ingestion.yaml b/.helm/templates/deployment-ingestion.yaml index a814df2..d2b4944 100644 --- a/.helm/templates/deployment-ingestion.yaml +++ b/.helm/templates/deployment-ingestion.yaml @@ -67,9 +67,9 @@ spec: value: {{ .Values.logs.datadogSite }} - name: PROTEUS__DEFAULT_LOG_LEVEL value: {{ .Values.logs.logLevel | quote }} - {{- if .Values.jobTemplateSettings.extraEnvFrom }} + {{- if .Values.extraEnvFrom }} envFrom: - {{- with .Values.jobTemplateSettings.extraEnvFrom }} + {{- with .Values.extraEnvFrom }} {{- toYaml . | nindent 10 }} {{- end }} {{- end }} From 6347ec3a1781382a92f4c36434472fe246213f75 Mon Sep 17 00:00:00 2001 From: Vitalii Savitskii Date: Thu, 18 Jul 2024 15:11:49 +0200 Subject: [PATCH 08/10] Remove service duplicate --- .helm/templates/service-ingestion.yaml | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 .helm/templates/service-ingestion.yaml diff --git a/.helm/templates/service-ingestion.yaml b/.helm/templates/service-ingestion.yaml deleted file mode 100644 index 3f6ef89..0000000 --- a/.helm/templates/service-ingestion.yaml +++ /dev/null @@ -1,16 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: {{ template "app.name" . }} - labels: - app.kubernetes.io/name: {{ template "app.name" . }} - {{- include "app.labels" $ | nindent 4 }} -spec: - type: ClusterIP - ports: - - port: 5000 - targetPort: 8080 - protocol: TCP - selector: - app.kubernetes.io/name: {{ template "app.name" . }} - app.kubernetes.io/instance: {{ .Release.Name }} From 6a315eb21dd29ee8e805b09398f920fc8d2206fc Mon Sep 17 00:00:00 2001 From: Vitalii Savitskii Date: Thu, 18 Jul 2024 16:20:06 +0200 Subject: [PATCH 09/10] Bump ESD SDK version --- src/Arcane.Ingestion.csproj | 7 +++++++ src/Startup.cs | 3 +++ 2 files changed, 10 insertions(+) diff --git a/src/Arcane.Ingestion.csproj b/src/Arcane.Ingestion.csproj index 9025c24..464a376 100644 --- a/src/Arcane.Ingestion.csproj +++ b/src/Arcane.Ingestion.csproj @@ -9,7 +9,14 @@ + + + + + Always + + diff --git a/src/Startup.cs b/src/Startup.cs index 98d1462..7a5b87f 100644 --- a/src/Startup.cs +++ b/src/Startup.cs @@ -42,6 +42,9 @@ public void ConfigureServices(IServiceCollection services) services.AddAzureTable(AzureStorageConfiguration.CreateDefault()); services.AddDatadogMetrics(DatadogConfiguration.Default(nameof(Arcane))); + var env = AmazonStorageConfiguration.CreateFromEnv(); + services.AddAwsS3Writer(env); + services.AddSingleton, JsonIngestionService>(); services.AddKubernetes(); From 0b1d3840785f99815effb2c3c71e2834635b8df3 Mon Sep 17 00:00:00 2001 From: Vitalii Savitskii Date: Thu, 18 Jul 2024 16:35:11 +0200 Subject: [PATCH 10/10] Update port settings --- .helm/templates/deployment-ingestion.yaml | 2 +- .helm/templates/service.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.helm/templates/deployment-ingestion.yaml b/.helm/templates/deployment-ingestion.yaml index d2b4944..14a61c2 100644 --- a/.helm/templates/deployment-ingestion.yaml +++ b/.helm/templates/deployment-ingestion.yaml @@ -47,7 +47,7 @@ spec: - '-c' - | - curl --fail http://localhost:80/health + curl --fail http://localhost:8080/health initialDelaySeconds: 10 timeoutSeconds: 5 diff --git a/.helm/templates/service.yaml b/.helm/templates/service.yaml index 1d66d1f..558224e 100644 --- a/.helm/templates/service.yaml +++ b/.helm/templates/service.yaml @@ -7,7 +7,7 @@ metadata: spec: type: ClusterIP ports: - - port: 80 + - port: 8080 targetPort: http protocol: TCP name: http