helmfile/cloud-sdk/helmfile.yaml

environments:
  amazon-cicd:
    secrets:
      - secrets/influxdb.yaml
      - secrets/sso.yaml
      - secrets/alertmanager.yaml
      - secrets/actions-runner-controller.yaml
      - secrets/ucentral-ap-firmware-logstash.yaml
      - secrets/core-dump-handler.yaml
    values:
    - eks:
        clusterName: tip-wlan-main
        region: ap-south-1
        dnsRegion: us-east-1
        accountID: 289708231103
        hostedZoneId: cicd
        certificateARNCICD: arn:aws:acm:ap-south-1:289708231103:certificate/2cc8c764-11fd-411d-bf7d-a93f488f3f6c
        certificateARNLab: arn:aws:acm:ap-south-1:289708231103:certificate/5281be14-c5e6-45c5-8d5c-e8660c76fbcb
    - monitoring:
        receiver: "slack-notifications"
        namespace: monitoring
        publicNamespaces: ['openwifi-qa01', 'openwifi-qa02', 'openwifi-dev01', 'openwifi-demo']
        clusterName: TIP WLAN CI/CD
    - domain: lab.wlan.tip.build
    - hostSuffix: ".lab.wlan.tip.build"
    - storageClass: gp2
    - autoscaler:
        #TODO:
        #enabled: true
        enabled: false
    - ingress:
        enabled: true
    - elastic:
        #TODO:
        #enabled: true
        enabled: false
    - kibana:
        #TODO:
        #enabled: true
        enabled: false
    - logstash:
        #TODO:
        #enabled: true
        enabled: false
    - prometheus:
        enabled: true
    - k8s-dashboard:
        enabled: true
    - metrics-server:
        enabled: true
    - external-dns:
        enabled: true
    - alb-ingress:
        enabled: true
    - node-termination-handler:
        enabled: true
    - influxdb:
        #skip this one for now
        enabled: false
    - actions-runner-controller:
        enabled: true
    - cert-manager:
        enabled: true
    - calico:
        enabled: true
    - core-dump-handler:
        #skip this one now
        enabled: false
  amazon-qa2:
    secrets:
      - secrets/influxdb.yaml
      - secrets/sso.yaml
      - secrets/alertmanager.yaml
      - secrets/actions-runner-controller.yaml
      - secrets/ucentral-ap-firmware-logstash.yaml
      - secrets/core-dump-handler.yaml
    values:
    - eks:
        clusterName: tip-wlan-qa
        #region: ap-south-1
        region: us-east-2
        dnsRegion: us-east-1
        accountID: 289708231103
        hostedZoneId: cicd
        # note these are both the same.
        # there isn't a lab cert in this region
        certificateARNCICD: arn:aws:acm:us-east-2:289708231103:certificate/299d7444-acc4-46c2-ae83-40d2cd5f49be
        certificateARNLab: arn:aws:acm:us-east-2:289708231103:certificate/299d7444-acc4-46c2-ae83-40d2cd5f49be
    - monitoring:
        namespace: monitoring
        receiver: "null"
        publicNamespaces: ['openwifi-qa03', 'openwifi-dev03']
        clusterName: TIP QA
    - domain: lab.wlan.tip.build
    - hostSuffix: "-qa02.cicd.lab.wlan.tip.build"
    - storageClass: gp2
    - autoscaler:
        #TODO:
        #enabled: true
        enabled: false
    - ingress:
        enabled: true
    - elastic:
        #TODO:
        #enabled: true
        enabled: false
    - kibana:
        #TODO:
        #enabled: true
        enabled: false
    - logstash:
        #TODO:
        #enabled: true
        enabled: false
    - prometheus:
        enabled: true
    - k8s-dashboard:
        enabled: true
    - metrics-server:
        enabled: true
    - external-dns:
        enabled: true
    - alb-ingress:
        enabled: true
    - node-termination-handler:
        enabled: false
    - influxdb:
        #skip this one for now
        enabled: false
    - actions-runner-controller:
        enabled: false
    - cert-manager:
        enabled: true
    - calico:
        enabled: false
    - core-dump-handler:
        #skip this one now
        enabled: false

---

repositories:
- name: stable
  url: https://charts.helm.sh/stable
- name: kokuwa
  url: https://kokuwaio.github.io/helm-charts
- name: nginx
  url: https://kubernetes.github.io/ingress-nginx
- name: eks
  url: https://aws.github.io/eks-charts
- name: elastic
  url: https://helm.elastic.co
- name: kubernetes-dashboard
  url: https://kubernetes.github.io/dashboard/
- name: autoscaler
  url: https://kubernetes.github.io/autoscaler/
- name: bitnami
  url: https://charts.bitnami.com/bitnami
- name: influxdata
  url: https://helm.influxdata.com
- name: actions-runner-controller
  url: https://actions-runner-controller.github.io/actions-runner-controller
- name: jetstack
  url: https://charts.jetstack.io
- name: prometheus-community
  url: https://prometheus-community.github.io/helm-charts
- name: projectcalico
  url: https://projectcalico.docs.tigera.io/charts
- name: braedon
  url: https://braedon.github.io/helm
- name: core-dump-handler
  url: https://ibm.github.io/core-dump-handler

helmDefaults:
  force: false
  timeout: 300
  # dont seem to work
  # wait: false
  # recreatePods: true
  # verify: true

templates:
  default: &default
    namespace: kube-system
    missingFileHandler: Warn
  cluster-autoscaler: &cluster-autoscaler
    values:
    - envs/common/cluster-autoscaler.yaml.gotmpl
  external-dns: &external-dns
    values:
    - envs/common/external-dns.yaml.gotmpl

# core setup
releases:
- name: cluster-autoscaler
  condition: autoscaler.enabled
  <<: *default
  <<: *cluster-autoscaler
  chart: autoscaler/cluster-autoscaler
  version: 9.11.0
  labels:
    role: setup
    group: system
    app: autoscaler

- name: external-dns
  condition: external-dns.enabled
  <<: *default
  <<: *external-dns
  chart: bitnami/external-dns
  version: 6.12.2
  labels:
    role: setup
    group: system
    app: external-dns

- name: nginx-ingress
  condition: ingress.enabled
  <<: *default
  chart: nginx/ingress-nginx
  version: 4.8.2
  labels:
    role: setup
    group: system
    app: ingress
  values:
  - controller:
      ingressClassResource:
        name: nginx-sso
      ingressClass: nginx-sso
      service:
        annotations:
          service.beta.kubernetes.io/aws-load-balancer-ssl-cert: {{ .Environment.Values.eks.certificateARNLab }}
          service.beta.kubernetes.io/aws-load-balancer-ssl-ports: https
          service.beta.kubernetes.io/aws-load-balancer-type: elb
          service.beta.kubernetes.io/aws-load-balancer-backend-protocol: tcp
          service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
        targetPorts:
          http: http
          https: http
      publishService:
        enabled: true
      metrics:
        enabled: true
        serviceMonitor:
          enabled: true
          additionalLabels:
            release: prometheus-operator
  - defaultBackend:
      enabled: true

# monitoring
- name: prometheus-operator-crds
  chart: prometheus-community/prometheus-operator-crds
  condition: prometheus.enabled
  version: 8.0.1
  labels:
    role: setup
    group: monitoring
    app: prometheus-operator
    crd: prometheus-operator-crd

- name: prometheus-operator
  condition: prometheus.enabled
  namespace: {{ .Environment.Values.monitoring.namespace }}
  chart: prometheus-community/kube-prometheus-stack
  version: 55.8.1
  labels:
    role: setup
    group: monitoring
    app: prometheus-operator
  values:
  - nameOverride: prometheus-operator
  - prometheus:
      enabled: true
      prometheusSpec:
        retention: 31d
        resources:
          requests:
            memory: 1400Mi
          limits:
            memory: 3000Mi
        storageSpec:
          volumeClaimTemplate:
            spec:
              storageClassName: gp2
              accessModes: ["ReadWriteOnce"]
              resources:
                requests:
                  storage: 50Gi
      ingress:
        enabled: true
        ingressClassName: nginx-sso
        annotations:
          nginx.ingress.kubernetes.io/auth-type: basic
          nginx.ingress.kubernetes.io/auth-secret: k8s-dashboard-basic-auth
          nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
          #nginx.ingress.kubernetes.io/auth-url: "https://$host/oauth2/auth"
          #nginx.ingress.kubernetes.io/auth-signin: "https://$host/oauth2/start?rd=$escaped_request_uri"
          #kubernetes.io/ingress.class: nginx-sso
        hosts:
        - prometheus{{ .Environment.Values.hostSuffix }}
  - additionalPrometheusRulesMap:
      testbed:
        groups:
        - name: Testbed alerts
          rules:
          - alert: Waiting pod
            expr: sum(kube_pod_container_status_waiting_reason{namespace=~".*nola.*", reason!="ContainerCreating"}) by (namespace, pod) > 0
            for: 5m
            labels:
              severity: warning
              area: testbed
              namespace: "{{`{{ $labels.namespace }}`}}"
              pod: "{{`{{ $labels.pod }}`}}"
            annotations:
              title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* is in waiting state
              description: The pod {{`{{ $labels.namespace }}/{{ $labels.pod }}`}} is in waiting state for at least 5 minutes. Please check for image pull issues.
          - alert: Node not ready
            expr: sum(kube_node_status_condition{condition="Ready", status!="true"}) by (node) > 0
            for: 60m
            labels:
              severity: error
              area: testbed
              node: "{{`{{ $labels.node }}`}}"
            annotations:
              title: Node {{`{{ $labels.node }}`}} not becoming ready
              description: The cluster node {{`{{ $labels.node }}`}} is not getting ready since 60 minutes. Please contact the cluster administrators.
          - alert: Pod OOM killed
            expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1
            for: 0m
            labels:
              severity: error
              area: testbed
              namespace: "{{`{{ $labels.namespace }}`}}"
              pod: "{{`{{ $labels.pod }}`}}"
              reason: "{{`{{ $labels.reason }}`}}"
            annotations:
              title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* has been OOM killed
              description: The pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* has been killed due to reaching its memory limit. Investigate the memory usage or increase the limit to prevent this.
          - alert: Pod exited with a segfault
            expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and min_over_time(kube_pod_container_status_last_terminated_exitcode[10m]) == 139
            for: 0m
            labels:
              severity: error
              area: testbed
              namespace: "{{`{{ $labels.namespace }}`}}"
              pod: "{{`{{ $labels.pod }}`}}"
              reason: "{{`{{ $labels.reason }}`}}"
            annotations:
              title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* exited with a segfault
              description: The pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* exited with a segmentation fault, please examine the coredump.
          - alert: Node low on memory
            expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 5
            for: 1m
            labels:
              severity: warning
              area: testbed
            annotations:
              title: Node {{`{{ $labels.instance }}`}} has very little memory capacity left
              description: The cluster node {{`{{ $labels.instance }}`}} has less than 5% memory available.
          - alert: Pod stuck in crash loop
            expr: increase(kube_pod_container_status_restarts_total[1m]) > 3
            for: 2m
            labels:
              severity: error
              area: testbed
              namespace: "{{`{{ $labels.namespace }}`}}"
              pod: "{{`{{ $labels.pod }}`}}"
            annotations:
              title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* is crash looping
              description: "The pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* is crash looping. Please check its logs to see what is going on."
          - alert: Pod restarted many times
            expr: (sum(increase(kube_pod_container_status_restarts_total[10m])) by (container)) > 0
            labels:
              severity: warning
              area: testbed
              namespace: "{{`{{ $labels.namespace }}`}}"
              pod: "{{`{{ $labels.pod }}`}}"
            annotations:
              title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* was restarted many times
              description: "The pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* was restarted more than 5 times. Please check its logs to see what is going on."
          - alert: PVC running out of space
            expr: kubelet_volume_stats_available_bytes / kubelet_volume_stats_capacity_bytes * 100 < 20
            labels:
              severity: warning
              area: testbed
              namespace: "{{`{{ $labels.namespace }}`}}"
              persistentvolumeclaim: "{{`{{ $labels.persistentvolumeclaim }}`}}"
            annotations:
              title: PVC *{{`{{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }}`}}* has less than 20% free storage
              description: "The PVC *{{`{{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }}`}}* only has {{`{{ $value }}`}}% capacity left. Please increase its size or clean it up."
# Not doing ES (currently)
#          - alert: ElasticSearch new assert_violation errors found
#            expr: rate(assert_violation_errors_hits[1m]) > 0
#            labels:
#              severity: warning
#              area: testbed
#              service: elasticsearch
#              namespace: "{{`{{ $labels.namespace }}`}}"
#            annotations:
#              title: New ElasticSearch logs found with Assertion violation (WIFI-9824)
#              description: New ElasticSearch logs found with Assertion violation (WIFI-9824)

  - grafana:
      #image:
        #repository: grafana/grafana
        #tag: 8.5.13
      grafana.ini:
        users:
          viewers_can_edit: true
        auth:
          disable_login_form: true
          disable_signout_menu: true
        auth.anonymous:
          enabled: true
          org_role: Viewer
      testFramework:
        enabled: false
      ingress:
        enabled: true
        ingressClassName: nginx-sso
        annotations:
          nginx.ingress.kubernetes.io/auth-type: basic
          nginx.ingress.kubernetes.io/auth-secret: k8s-dashboard-basic-auth
          nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
          #nginx.ingress.kubernetes.io/auth-url: "https://$host/oauth2/auth"
          #nginx.ingress.kubernetes.io/auth-signin: "https://$host/oauth2/start?rd=$escaped_request_uri"
          #kubernetes.io/ingress.class: nginx-sso
        hosts:
        - grafana{{ .Environment.Values.hostSuffix }}
      dashboardProviders:
        dashboardproviders.yaml:
          apiVersion: 1
          providers:
          - name: 'default'
            orgId: 1
            folder: imported
            type: file
            disableDeletion: false
            editable: true
            options:
              path: /var/lib/grafana/dashboards/default

      #dashboards:
      #  default:
      #    performance:
      #      url: https://raw.githubusercontent.com/Telecominfraproject/wlan-testing/master/grafana/performance.json
      #    qaDebugging:
      #      url: https://raw.githubusercontent.com/Telecominfraproject/wlan-cloud-ucentral-analytics/main/grafana/qa-debugging.json

      datasources:
       datasources.yaml:
        apiVersion: 1
        datasources:
        - name: Prometheus
          type: prometheus
          url: http://prometheus-operated:9090
          access: proxy
          isDefault: false
# Not doing influx and ES anymore (for now)
#        - name: InfluxDB
#          type: influxdb
#          access: proxy
#          url: https://influx.cicd{{ .Environment.Values.hostSuffix }}
#          user: tip
#          secureJsonData:
#            token: {{ .Environment.Values.influxdb.adminUser.token }}
#            password: {{ .Environment.Values.influxdb.adminUser.password }}
#          jsonData:
#            version: Flux
#            organization: tip
#            defaultBucket: tip-cicd
#        - name: ES
#          type: elasticsearch
#          access: proxy
#          url: http://elasticsearch-client.monitoring.svc:9200
#          database: logstash-*
#          isDefault: false
#          jsonData:
#            esVersion: 6
#            timeField: '@timestamp'
#            logMessageField: message
#            logLevelField: fields.level

  - alertmanager:
      config:
        global:
          resolve_timeout: 1m
          slack_api_url: {{ .Environment.Values.alertmanager.slack_api_url }}
        route:
          receiver: "null"
          routes:
          - match:
              area: testbed
            receiver: "{{ .Environment.Values.monitoring.receiver }}"

        receivers:
        - name: "null"
        - name: "slack-notifications"
          slack_configs:
          - channel: "#open-wifi-testbed-alerts"
            send_resolved: true
            icon_url: https://avatars3.githubusercontent.com/u/3380462
            title: |-
            {{- readFile "alertmanager-templates/title.tpl" | nindent 14 }}
            text: >-
            {{- readFile "alertmanager-templates/text.tpl" | nindent 14 }}
            title_link: ""

#  - kube-state-metrics:
#      image:
#        repository: tip-tip-wlan-cloud-ucentral.jfrog.io/kube-state-metrics
#        tag: v2.6.0-tip20221103

- name: prometheus-operator-helper
  condition: prometheus.enabled
  namespace: {{ .Environment.Values.monitoring.namespace }}
  chart: charts/standalone-monitoring
  labels:
    role: setup
    group: monitoring
    app: prometheus-operator
    sub: helper
  values:
  - monitoring:
      namespace: {{ .Environment.Values.monitoring.namespace }}
      domain: {{ .Environment.Values.domain }}
  - proxy:
      namespace: kube-system

#- name: prometheus-operator-ingress-auth
#  condition: prometheus.enabled
#  namespace: kube-system
#  chart: charts/sso
#  labels:
#    role: setup
#    group: monitoring
#    app: prometheus-operator
#    sub: oAuth
#  values:
#  - monitoring:
#      domain: example.com
#      namespace: {{ .Environment.Values.monitoring.namespace }}
#  - oidc:
#      issuerUrl: {{ .Environment.Values.sso.oidc.issuerUrl }}
#      clientId: {{ .Environment.Values.sso.oidc.clientId }}
#      clientSecret: {{ .Environment.Values.sso.oidc.clientSecret }}
#      cookieSecret: {{ .Environment.Values.sso.oidc.cookieSecret }}

- name: fluentd
  condition: elastic.enabled
  namespace: {{ .Environment.Values.monitoring.namespace }}
  chart: kokuwa/fluentd-elasticsearch
  version: 13.1.0
  labels:
    role: setup
    group: monitoring
    app: fluentd
  values:
  - elasticsearch:
      serviceAccount:
        create: true
      hosts:
      - elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local
  - resources:
      requests:
        cpu: 100m
        memory: 200Mi
      limits:
        cpu: 500m
        memory: 300Mi
  - tolerations:
    - effect: NoSchedule
      operator: Exists

- name: elasticsearch
  condition: elastic.enabled
  namespace: {{ .Environment.Values.monitoring.namespace }}
  chart: charts/elasticsearch
  labels:
    role: setup
    group: monitoring
    app: elasticsearch
  values:
  - image:
      repository: "docker.elastic.co/elasticsearch/elasticsearch"
      tag: "6.8.22"
  - client:
      resources:
        limits:
          memory: 3Gi
        requests:
          memory: 3Gi
      heapSize: "1500m"

  - master:
      resources:
        limits:
          cpu: 200m
          memory: 1000Mi
        requests:
          cpu: 200m
          memory: 800Mi

  - data:
      persistence:
        size: 650Gi
      resources:
        limits:
          cpu: 3
          memory: 4Gi
        requests:
          cpu: 1500m
          memory: 4Gi
      heapSize: "3000m"
      readinessProbe: null

- name: elasticsearch-curator
  condition: elastic.enabled
  namespace: {{ .Environment.Values.monitoring.namespace }}
  chart: stable/elasticsearch-curator
  labels:
    role: setup
    group: monitoring
    app: elasticsearch
  values:
  - configMaps:
      config_yml: |-
        client:
          hosts:
            -  http://elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200
      action_file_yml: |-
        actions:
          1:
            action: delete_indices
            description: "Delete old indices"
            options:
              ignore_empty_list: True
              continue_if_exception: True
              timeout_override: 300
            filters:
            - filtertype: pattern
              kind: prefix
              value: 'logstash-'
            - filtertype: age
              source: name
              direction: older
              timestring: '%Y.%m.%d'
              unit: days
              unit_count: 30

- name: kibana
  condition: kibana.enabled
  namespace: {{ .Environment.Values.monitoring.namespace }}
  chart: charts/kibana
  labels:
    role: setup
    group: monitoring
    app: kibana
  values:
  - testFramework:
      enabled: false
  - image:
      repository: "docker.elastic.co/kibana/kibana"
      tag: "6.8.6"
  - files:
      kibana.yml:
        elasticsearch.hosts: http://elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200
  - ingress:
      enabled: true
      annotations:
        nginx.ingress.kubernetes.io/auth-url: "https://$host/oauth2/auth"
        nginx.ingress.kubernetes.io/auth-signin: "https://$host/oauth2/start?rd=$escaped_request_uri"
        kubernetes.io/ingress.class: nginx-sso
      hosts:
      - kibana{{ .Environment.Values.hostSuffix }}
  - lifecycle:
      postStart:
        exec:
          command:
          - bash
          - -c
          - |
            #!/bin/bash
            # Config the index_pattern
            TEMPLATE_NAME="logstash"
            INDEX_PATTERN="logstash-*"
            KIBANA_URL=http://localhost:5601

            # Wait until service is ready
            while [[ "$(curl -s -o /dev/null -w '%{http_code}\n' $KIBANA_URL/app/kibana)" != "200" ]]; do sleep 1; done

            # Apply default Index Pattern into Kibana
            curl -X POST -v $KIBANA_URL/api/saved_objects/index-pattern/$TEMPLATE_NAME \
            -H 'kbn-xsrf: true' -H 'Content-Type: application/json' \
            -d '{"attributes": {"title": "'$INDEX_PATTERN'"}}'
  - dashboardImport:
      enabled: true
      timeout: 60
      basePath: ""
      dashboards:
        k8s-container-logs: |
        {{- readFile "kibana-dashboards/k8s-container-logs.json" | nindent 10 }}

- name: logstash
  condition: logstash.enabled
  namespace: {{ .Environment.Values.monitoring.namespace }}
  chart: elastic/logstash
  version: 7.16.2
  labels:
    role: setup
    group: monitoring
    app: logstash
  values:
  - image: docker.elastic.co/logstash/logstash
  - replicas: 1
  - resources:
      limits:
        memory: 2Gi
      requests:
        memory: 2Gi
  - logstashConfig:
      logstash.yml: |
        http.host: 0.0.0.0
        log.level: warn
  - logstashPipeline:
      logstash.conf: "" # override default pipeline
      ucentral-kafka.conf: |
        input {
          kafka {
            bootstrap_servers => "kafka-headless.openwifi-dev01.svc.cluster.local:9092"
            topics => ["state", "healthcheck", "device_event_queue"]
            tags => ["openwifi-kafka"]
            #codec => json
            decorate_events => true
            auto_offset_reset => "latest"
            client_id => "openwifi-dev01"
            add_field => { "instance" => "openwifi-dev01" }
          }

          kafka {
            bootstrap_servers => "kafka-headless.openwifi-qa01.svc.cluster.local:9092"
            topics => ["state", "healthcheck", "device_event_queue"]
            tags => ["openwifi-kafka"]
            decorate_events => true
            auto_offset_reset => "latest"
            client_id => "openwifi-qa01"
            add_field => { "instance" => "openwifi-qa01" }
          }

          kafka {
            bootstrap_servers => "kafka-headless.openwifi-demo.svc.cluster.local:9092"
            topics => ["state", "healthcheck", "device_event_queue"]
            tags => ["openwifi-kafka"]
            #codec => json
            decorate_events => true
            auto_offset_reset => "latest"
            client_id => "openwifi-demo"
            add_field => { "instance" => "openwifi-demo" }
          }
        }

        filter {
          if "openwifi-kafka" in [tags] {
            mutate { copy => { "[@metadata][kafka]" => "kafka" } }
          }
        }

        filter {
          if "openwifi-kafka" in [tags] {
            json {
              source => "message"
              remove_field => [ "message" ]
            }
          }
        }

        filter {
          if ([payload][state][unit][load]) {
            mutate {
              add_field => { "[payload][state][unit][load1]" => "%{[payload][state][unit][load][0]}" }
              add_field => { "[payload][state][unit][load5]" => "%{[payload][state][unit][load][1]}" }
              add_field => { "[payload][state][unit][load15]" => "%{[payload][state][unit][load][2]}" }
              remove_field => [ "[payload][state][unit][load]" ]
            }

            mutate {
              convert => {
                "[payload][state][unit][load1]" => "integer"
                "[payload][state][unit][load5]" => "integer"
                "[payload][state][unit][load15]" => "integer"
              }
            }
          }
        }

        output {
          if "openwifi-kafka" in [tags] {
            elasticsearch {
              hosts => "http://elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200"
              index => "logstash-ucentral-%{+YYYY.MM.dd}"
            }
          }
        }

      ucentral-ap-firmware.conf: |
        input {
          s3 {
            access_key_id => "{{ .Environment.Values.ucentral_ap_firmware_logstash.access_key_id }}"
            secret_access_key => "{{ .Environment.Values.ucentral_ap_firmware_logstash.secret_access_key }}"
            bucket => "ucentral-ap-firmware-logs"
            delete => true
            region => "us-east-1"
            tags => ["ucentral-ap-firmware"]
            additional_settings => {
              force_path_style => true
              follow_redirects => false
            }
          }
        }

        filter {
          if "ucentral-ap-firmware" in [tags] {
            grok {
              match => { "message" => "%{S3_ACCESS_LOG}" }
              remove_field => ["message"]
            }

            date {
              match => [ "timestamp", "dd/MMM/yyyy:HH:mm:ss Z" ]
              remove_field => ["timestamp"]
            }

            geoip {
              source => "clientip"
            }

            if [operation] == "REST.GET.OBJECT" and [key] =~ /^*\.bin$/ {
              grok {
                match => { "key" => "%{UPLOAD_DATE:upload_date}-%{AP_MODEL:ap_model}-%{GREEDYDATA:branch_name}-%{GIT_REV:git_rev}-%{WORD:suffix}.%{WORD:file_extension}" }
                pattern_definitions => {
                 "UPLOAD_DATE" => "%{YEAR}%{MONTHNUM}%{MONTHDAY}"
                 "AP_MODEL" => "[a-z0-9]+_[a-z0-9]+(-|_)?[a-z0-9]+(-|_)?[a-z0-9]+"
                 "GIT_REV" => "[a-z0-9]{4,8}" 
                }
                add_field => { "timestamp_clientip" => "%{@timestamp} %{clientip}" }
              }
            }
          }
        }

        output {
          if "ucentral-ap-firmware" in [tags] {
            elasticsearch {
              hosts => "http://elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200"
              index => "logstash-ucentral-ap-firmware-%{+YYYY.MM.dd}"
            }
          }
        }

      syslog.conf: |
        input {
          syslog {
            tags => ["ucentral-syslog"]
            port => 5514
            grok_pattern => "(?:<%{POSINT:priority}>%{SYSLOGLINE}|%{MONTH} %{MONTHDAY} %{TIME} %{DATA:docker.container_name}/%{DATA:github.run_number}\[%{INT:undefined_number}\]: %{GREEDYDATA:message})"
          }
        }

        filter {
          if ([undefined_number]) {
            mutate {
              remove_field => [ "undefined_number" ]
            }
          }
        }

        output {
          if "ucentral-syslog" in [tags] {
            elasticsearch {
              hosts => "http://elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200"
              index => "logstash-%{+YYYY.MM.dd}"
              document_type => "_doc"
            }
          }
        }
  - service:
      type: LoadBalancer
      annotations:
        service.beta.kubernetes.io/aws-load-balancer-type: "nlb-ip"
        service.beta.kubernetes.io/aws-load-balancer-internal: "true"
        service.beta.kubernetes.io/aws-load-balancer-private-ipv4-addresses: "10.10.10.40,10.10.11.40,10.10.12.40"
      ports:
        - name: syslog
          port: 514
          targetPort: 5514
          protocol: TCP

- name: simple-auth-roles
  condition: ingress.enabled
  namespace: {{ .Environment.Values.monitoring.namespace }}
  chart: charts/k8s-dashboard-roles
  labels:
    role: setup
    group: system
    app: ingress

- name: k8s-dashboard
  condition: k8s-dashboard.enabled
  namespace: {{ .Environment.Values.monitoring.namespace }}
  chart: kubernetes-dashboard/kubernetes-dashboard
  version: 5.0.5
  labels:
    role: setup
    group: monitoring
    app: k8s-dashboard
  values:
  - settings:
      defaultNamespace: {{ index .Environment.Values.monitoring.publicNamespaces 0 }}
      namespaceFallbackList: {{ .Environment.Values.monitoring.publicNamespaces | toJson }}
      itemsPerPage: 25
      clusterName: {{ .Environment.Values.monitoring.clusterName }}
  - extraArgs:
    - --enable-skip-login
    - --system-banner=Welcome to the {{ .Environment.Values.monitoring.clusterName}} Kubernetes cluster. If you are missing your namespace in the above select box, please <a href="https://telecominfraproject.atlassian.net/browse/WIFI">create a ticket</a>.
  - rbac:
      create: false
      clusterRoleMetrics: true
      clusterReadOnlyRole: false
  - service:
      type: NodePort
      externalPort: 80
  - protocolHttp: true
  - ingress:
      className: nginx-sso
      enabled: true
      paths:
        - /
        - /*
      annotations:
        #nginx.ingress.kubernetes.io/auth-url: "https://$host/oauth2/auth"
        #nginx.ingress.kubernetes.io/auth-signin: "https://$host/oauth2/start?rd=$escaped_request_uri"
        nginx.ingress.kubernetes.io/auth-type: basic
        nginx.ingress.kubernetes.io/auth-secret: k8s-dashboard-basic-auth
        nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
      hosts:
        - k8s-dashboard{{ .Environment.Values.hostSuffix }}

- name: metrics-server
  condition: metrics-server.enabled
  namespace: {{ .Environment.Values.monitoring.namespace }}
  chart: bitnami/metrics-server
  version: 6.2.4
  labels:
    role: setup
    group: monitoring
    app: metrics-server
  values:
  - apiService:
      create: true

- name: aws-load-balancer-controller
  <<: *default
  condition: alb-ingress.enabled
  chart: eks/aws-load-balancer-controller
  version: 1.6.2
  labels:
    role: setup
    group: system
    app: aws-load-balancer-controller
  values:
  - serviceAccount:
      create: false
      name: {{ .Values.eks.clusterName }}-alb-ingress-sa
      annotations:
        eks.amazonaws.com/role-arn: arn:aws:iam::{{ .Values.eks.accountID }}:role/{{ .Values.eks.clusterName }}-alb-ingress
    clusterName: {{ .Values.eks.clusterName }}
    enableShield: false
    enableWaf: false
    enableWafv2: false
    logLevel: info

- name: aws-node-termination-handler
  <<: *default
  condition: node-termination-handler.enabled
  chart: eks/aws-node-termination-handler
  version: 0.21.0
  labels:
    role: setup
    group: system
    app: node-termination-handler
  values:
    - deleteLocalData: true
    - ignoreDaemonSets: true
    - podTerminationGracePeriod: -1 # use values defined in Pod

- name: influxdb
  namespace: test-bss
  chart: influxdata/influxdb2
  version: 2.0.3
  condition: influxdb.enabled
  labels:
    role: setup
    group: load-testing
    app: influxdb
    task: tools-133
  values:
    - image:
        tag: 2.0.6-alpine
    - adminUser:
        organization: tip
        bucket: tip-cicd
        user: tip
        password: {{ .Environment.Values.influxdb.adminUser.password }}
        token: {{ .Environment.Values.influxdb.adminUser.token }}
        retention_policy: "0s"
    - persistence:
        storageClass: gp2
        size: 10Gi
    - service:
        type: NodePort
    - resources:
        limits:
         cpu: 500m
         memory: 500Mi
        requests:
         cpu: 500m
         memory: 500Mi
    - ingress:
        enabled: true
        annotations:
          kubernetes.io/ingress.class: alb
          alb.ingress.kubernetes.io/scheme: internet-facing
          alb.ingress.kubernetes.io/group.name: test-bss-load-testing
          alb.ingress.kubernetes.io/certificate-arn: {{ .Environment.Values.eks.certificateARNCICD }}
          alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
          alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_302"}}'
          alb.ingress.kubernetes.io/healthcheck-path: /health
          external-dns.alpha.kubernetes.io/hostname: influx.cicd{{ .Environment.Values.hostSuffix }}
        hostname: influx.cicd{{ .Environment.Values.hostSuffix }}
        path: "/*"

- name: actions-runner-controller
  condition: actions-runner-controller.enabled
  namespace: actions-runner-controller
  chart: actions-runner-controller/actions-runner-controller
  version: 0.19.1
  labels:
    app: actions-runner-controller
  values:
  - authSecret:
      create: true
      github_token: {{ .Environment.Values.actions_runner_controller.github_token }}
  - image:
      pullPolicy: IfNotPresent
  - githubWebhookServer:
      enabled: true
      secret:
        github_webhook_secret_token: {{ .Environment.Values.actions_runner_controller.webhook_secret }}
      service:
        type: NodePort
      ingress:
        enabled: true
        hosts:
        - host: "ghac-webhook.cicd.lab.wlan.tip.build"
          paths:
          - path: /*
            pathType: ImplementationSpecific
        annotations:
          kubernetes.io/ingress.class: alb
          alb.ingress.kubernetes.io/scheme: internet-facing
          alb.ingress.kubernetes.io/group.name: wlan-cicd
          alb.ingress.kubernetes.io/certificate-arn: {{ .Environment.Values.eks.certificateARNCICD }}
          alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
          alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_302"}}'

- name: actions-runner-controller-addon
  condition: actions-runner-controller.enabled
  namespace: actions-runner-controller
  chart: charts/actions-runner-controller-addon
  labels:
    app: actions-runner-controller
  disableValidation: true

- name: cert-manager
  condition: cert-manager.enabled
  namespace: cert-manager
  chart: jetstack/cert-manager
  version: v1.13.0
  labels:
    app: cert-manager
  values:
  - installCRDs: true
  - webhook:
      resources:
        requests:
          cpu: 500m
          memory: 150Mi
        limits:
          cpu: 500m
          memory: 150Mi

- name: calico
  condition: calico.enabled
  chart: projectcalico/tigera-operator
  version: v3.26.1
  namespace: tigera-operator
  disableValidation: true
  labels:
    app: calico
  values:
    - installation:
        kubernetesProvider: EKS

- name: github-actions-network-policies
  #condition: calico.enabled
  condition: actions-runner-controller.enabled
  namespace: actions-runner-controller
  chart: charts/github-actions-network-policies
  labels:
    role: setup
    group: networking
    app: github-actions-network-policies
    sub1: calico
    sub2: actions-runner-controller-addon

- name: elasticsearch-exporter
  condition: elastic.enabled
  namespace: {{ .Environment.Values.monitoring.namespace }}
  chart: braedon/prometheus-es-exporter
  version: 0.2.0
  labels:
    group: monitoring
    app: elasticsearch-exporter
  values:
    - image:
        tag: 0.14.1
    - elasticsearch:
        cluster: elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200
        queries: |-
          [DEFAULT]
          QueryIntervalSecs = 15
          QueryTimeoutSecs = 10
          QueryIndices = _all
          QueryOnError = drop
          QueryOnMissing = drop
          [query_assert_violation_errors]
          QueryOnError = preserve
          QueryOnMissing = zero
          QueryJson = {
              "query": {
                "bool": {
                  "filter": {
                    "bool": {
                      "must": [
                        {
                          "match": {
                            "kubernetes.namespace_name.keyword": "openwifi-qa01"
                          }
                        },
                        {
                          "match": {
                            "kubernetes.container_name.keyword": "owgw"
                          }
                        }
                      ]
                    }
                  },
                  "must": {
                    "match": {
                      "message": "Assertion violation: !_pStream"
                    }
                  }
                }
              },
              "aggs": {
                "agg_terms_kubernetes.container_name.keyword": {
                  "terms": {
                    "field": "kubernetes.container_name.keyword"
                  }
                }
              }
            }

- name: core-dump-handler
  condition: core-dump-handler.enabled
  chart: core-dump-handler/core-dump-handler
  version: v8.6.0
  namespace: ibm-observe
  labels:
    app: core-dump-handler
  values:
    - daemonset:
        s3BucketName: openwifi-core-dumps
        s3AccessKey: {{ .Environment.Values.core_dump_handler.s3_access_key }}
        s3Secret: {{ .Environment.Values.core_dump_handler.s3_secret }}
        s3Region: us-east-1
        includeCrioExe: true
        vendor: rhel7