Skip to content

Commit

Permalink
Merge branch 'stackhpc/2023.1' into logs_in_grafana
Browse files Browse the repository at this point in the history
  • Loading branch information
dougszumski authored Jul 10, 2024
2 parents 2d6f175 + 4c7b7c9 commit ba3fc8a
Show file tree
Hide file tree
Showing 122 changed files with 51,929 additions and 50,713 deletions.
2 changes: 1 addition & 1 deletion .automation
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ max_microversion = 3.70
build_timeout = 600

[dashboard]
dashboard_url = http://192.168.39.2
dashboard_url = https://192.168.39.2
47 changes: 44 additions & 3 deletions .github/workflows/stackhpc-all-in-one.yml
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ jobs:
VM_NETWORK: ${{ inputs.vm_network }}
VM_SUBNET: ${{ inputs.vm_subnet }}
VM_INTERFACE: ${{ inputs.vm_interface }}
VM_VOLUME_SIZE: ${{ inputs.upgrade && '45' || '35' }}
VM_VOLUME_SIZE: ${{ inputs.upgrade && '50' || '40' }}
VM_TAGS: '["skc-ci-aio", "PR=${{ github.event.number }}"]'

- name: Terraform Plan
Expand All @@ -179,6 +179,7 @@ jobs:
OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET }}

- name: Terraform Apply
id: tf_apply
run: |
for attempt in $(seq 5); do
if terraform apply -auto-approve; then
Expand Down Expand Up @@ -355,6 +356,7 @@ jobs:
if: inputs.upgrade

- name: Tempest tests
id: tempest
run: |
mkdir -p tempest-artifacts
docker run -t --rm \
Expand All @@ -366,16 +368,55 @@ jobs:
env:
KAYOBE_AUTOMATION_SSH_PRIVATE_KEY: ${{ steps.ssh_key.outputs.ssh_key }}

- name: StackHPC OpenStack tests
id: stackhpc-openstack-tests
continue-on-error: true
run: |
mkdir -p sot-results
docker run -t --rm \
-v $(pwd):/stack/kayobe-automation-env/src/kayobe-config \
-v $(pwd)/sot-results:/stack/sot-results \
-e KAYOBE_ENVIRONMENT -e KAYOBE_VAULT_PASSWORD -e KAYOBE_AUTOMATION_SSH_PRIVATE_KEY \
$KAYOBE_IMAGE \
/stack/kayobe-automation-env/src/kayobe-config/.automation/pipeline/playbook-run.sh '$KAYOBE_CONFIG_PATH/ansible/stackhpc-openstack-tests.yml'
env:
KAYOBE_AUTOMATION_SSH_PRIVATE_KEY: ${{ steps.ssh_key.outputs.ssh_key }}

- name: Collect diagnostic information
id: diagnostics
run: |
mkdir -p diagnostics
sudo -E docker run -t --rm \
-v $(pwd):/stack/kayobe-automation-env/src/kayobe-config \
-v $(pwd)/diagnostics:/stack/diagnostics \
-e KAYOBE_ENVIRONMENT -e KAYOBE_VAULT_PASSWORD -e KAYOBE_AUTOMATION_SSH_PRIVATE_KEY \
$KAYOBE_IMAGE \
/stack/kayobe-automation-env/src/kayobe-config/.automation/pipeline/playbook-run.sh '$KAYOBE_CONFIG_PATH/ansible/diagnostics.yml'
env:
KAYOBE_AUTOMATION_SSH_PRIVATE_KEY: ${{ steps.ssh_key.outputs.ssh_key }}
if: ${{ !cancelled() && steps.tf_apply.outcome == 'success' }}

- name: Upload test result artifacts
uses: actions/upload-artifact@v4
with:
name: tempest-results-${{ inputs.os_distribution }}-${{ inputs.os_release }}-${{ inputs.neutron_plugin }}${{ inputs.upgrade && '-upgrade' }}
path: tempest-artifacts/*
name: test-results-${{ inputs.os_distribution }}-${{ inputs.os_release }}-${{ inputs.neutron_plugin }}${{ inputs.upgrade && '-upgrade' || '' }}
path: |
diagnostics/
tempest-artifacts/
sot-results/
if: ${{ !cancelled() && (steps.tempest.outcome == 'success' || steps.stackhpc-openstack-tests.outcome == 'success' || steps.diagnostics.outcome == 'success') }}

- name: Fail if any Tempest tests failed
run: |
test $(wc -l < tempest-artifacts/failed-tests) -lt 1
- name: Fail if any StackHPC OpenStack tests failed
run: |
echo "Some StackHPC OpenStack tests failed."
echo "See HTML results artifact (sot-results) for details."
exit 1
if: steps.stackhpc-openstack-tests.outcome == 'failure'

- name: Destroy
run: terraform destroy -auto-approve
working-directory: ${{ github.workspace }}/terraform/aio
Expand Down
35 changes: 26 additions & 9 deletions .github/workflows/stackhpc-container-image-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,10 @@ on:
required: false
default: true
push-dirty:
description: Push scanned images that have vulnerabilities?
description: Push scanned images that have critical vulnerabilities?
type: boolean
required: false
# NOTE(Alex-Welsh): This default should be flipped once we resolve existing failures
default: true
default: false

env:
ANSIBLE_FORCE_COLOR: True
Expand Down Expand Up @@ -136,6 +135,10 @@ jobs:
run: |
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sudo sh -s -- -b /usr/local/bin v0.49.0
- name: Install yq
run: |
curl -sL https://github.com/mikefarah/yq/releases/download/v4.42.1/yq_linux_amd64.tar.gz | tar xz && sudo mv yq_linux_amd64 /usr/bin/yq
- name: Install Kayobe
run: |
mkdir -p venvs &&
Expand All @@ -149,7 +152,7 @@ jobs:
# Normally installed during host configure.
- name: Install Docker Python SDK
run: |
sudo pip install docker
sudo pip install docker 'requests<2.32.0'
- name: Get Kolla tag
id: write-kolla-tag
Expand All @@ -176,7 +179,7 @@ jobs:
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}

- name: Create build logs output directory
run: mkdir image-build-logs
run: mkdir image-build-logs

- name: Build kolla overcloud images
id: build_overcloud_images
Expand Down Expand Up @@ -235,9 +238,16 @@ jobs:
run: cp image-build-logs/image-scan-output/clean-images.txt image-build-logs/push-attempt-images.txt
if: inputs.push

# NOTE(seunghun1ee): This always appends dirty images with CVEs severity lower than critical.
# This should be reverted when it's decided to filter high level CVEs as well.
- name: Append dirty images to push list
run: |
cat image-build-logs/image-scan-output/dirty-images.txt >> image-build-logs/push-attempt-images.txt
if: ${{ inputs.push }}

- name: Append images with critical vulnerabilities to push list
run: |
cat image-build-logs/image-scan-output/critical-images.txt >> image-build-logs/push-attempt-images.txt
if: ${{ inputs.push && inputs.push-dirty }}

- name: Push images
Expand All @@ -249,11 +259,11 @@ jobs:
while read -r image; do
# Retries!
for i in {1..5}; do
for i in {1..5}; do
if docker push $image; then
echo "Pushed $image"
break
elif $i == 5; then
elif [ $i -eq 5 ] ; then
echo "Failed to push $image"
echo $image >> image-build-logs/push-failed-images.txt
else
Expand Down Expand Up @@ -283,8 +293,15 @@ jobs:
run: if [ $(wc -l < image-build-logs/push-failed-images.txt) -gt 0 ]; then cat image-build-logs/push-failed-images.txt && exit 1; fi
if: ${{ !cancelled() }}

- name: Fail when images failed scanning
run: if [ $(wc -l < image-build-logs/dirty-images.txt) -gt 0 ]; then cat image-build-logs/dirty-images.txt && exit 1; fi
# NOTE(seunghun1ee): Currently we want to mark the job fail only when critical CVEs are detected.
# This can be used again instead of "Fail when critical vulnerabilities are found" when it's
# decided to fail the job on detecting high CVEs as well.
# - name: Fail when images failed scanning
# run: if [ $(wc -l < image-build-logs/image-scan-output/dirty-images.txt) -gt 0 ]; then cat image-build-logs/image-scan-output/dirty-images.txt && exit 1; fi
# if: ${{ !inputs.push-dirty && !cancelled() }}

- name: Fail when critical vulnerabilities are found
run: if [ $(wc -l < image-build-logs/image-scan-output/critical-images.txt) -gt 0 ]; then cat image-build-logs/image-scan-output/critical-images.txt && exit 1; fi
if: ${{ !inputs.push-dirty && !cancelled() }}

# NOTE(mgoddard): Trigger another CI workflow in the
Expand Down
35 changes: 27 additions & 8 deletions doc/source/configuration/cephadm.rst
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
================
Cephadm & Kayobe
================
====
Ceph
====

This section describes how to use the Cephadm integration included in StackHPC
Kayobe configuration since Xena to deploy Ceph.
Kayobe configuration to deploy Ceph.

The Cephadm integration takes the form of custom playbooks that wrap
around the Ansible `stackhpc.cephadm collection
Expand All @@ -19,10 +19,10 @@ create or modify Ceph cluster deployments. Supported features are:
Resources
=========

- https://docs.ceph.com/en/pacific/cephadm/index.html
- https://docs.ceph.com/en/pacific/
- https://docs.ceph.com/en/quincy/cephadm/index.html
- https://docs.ceph.com/en/quincy/
- https://docs.ceph.com/en/reef/cephadm/index.html
- https://docs.ceph.com/en/reef/
- https://github.com/stackhpc/ansible-collection-cephadm

Configuration
Expand Down Expand Up @@ -107,7 +107,7 @@ OSD specification
~~~~~~~~~~~~~~~~~

The following example is a basic OSD spec that adds OSDs for all
available disks:
available disks with encryption at rest:

.. code:: yaml
Expand All @@ -118,9 +118,10 @@ available disks:
host_pattern: "*"
data_devices:
all: true
encrypted: true
More information about OSD service placement is available
`here <https://docs.ceph.com/en/pacific/cephadm/services/osd/#advanced-osd-service-specifications>`__.
`here <https://docs.ceph.com/en/quincy/cephadm/services/osd/#advanced-osd-service-specifications>`__.

Container image
~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -264,6 +265,24 @@ post-deployment configuration is applied. Commands in the
``cephadm_commands_post`` list are executed after the rest of the Ceph
post-deployment configuration is applied.

Messenger v2 encryption in transit
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Messenger v2 is the default on-wire protocol since the Nautilus release. It
supports `encryption of data in transit
<https://docs.ceph.com/en/quincy/rados/configuration/msgr2/#connection-mode-configuration-options>`_,
but this is not used by default. It may be enabled as follows:

.. code:: yaml
# A list of commands to pass to cephadm shell -- ceph. See stackhpc.cephadm.commands
# for format.
cephadm_commands_pre:
# Enable messenger v2 encryption in transit.
- "config set global ms_cluster_mode secure"
- "config set global ms_service_mode secure"
- "config set global ms_client_mode secure"
Manila & CephFS
~~~~~~~~~~~~~~~

Expand Down
141 changes: 141 additions & 0 deletions doc/source/configuration/cloudkitty.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
==========
CloudKitty
==========

Configuring in kayobe-config
============================

By default, CloudKitty uses Gnocchi and Ceilometer as the collector and fetcher
backends. Unless the system has a specific reason not to, we recommend instead
using Prometheus as the backend for both. The following instructions explain
how to do this. Also, see the `Kolla Ansible docs on CloudKitty
<https://docs.openstack.org/kolla-ansible/latest/reference/rating/cloudkitty-guide.html>`__
for more details.

Enable CloudKitty and disable InfluxDB, as we are using OpenSearch as the
storage backend. Set the following in ``kolla.yml``:

.. code-block:: yaml
kolla_enable_cloudkitty: true
# Explicitly disable influxdb as we are using OpenSearch as the CloudKitty backend
kolla_enable_influxdb: false
Set Prometheus as the backend for both the collector and fetcher, and
Elasticsearch as the storage backend. Note that our fork of CloudKitty is
patched so that the CloudKitty Elasticsearch V2 storage backend will also work
with an OpenSearch cluster. Proper support for the V2 OpenSearch storage
backend is still pending in Kolla-Ansible `here
<https://review.opendev.org/c/openstack/kolla-ansible/+/898555>`__. Set the
following in ``kolla/globals.yml``:

.. code-block:: yaml
cloudkitty_collector_backend: prometheus
cloudkitty_fetcher_backend: prometheus
cloudkitty_storage_backend: elasticsearch
If you have TLS enabled, you will also need to set the cafile for Prometheus
and Elasticsearch. Set the following in ``kolla/globals.yml``.

.. code-block::
{% raw %}
cloudkitty_prometheus_cafile: "{{ openstack_cacert }}"
cloudkitty_elasticsearch_cafile: "{{ openstack_cacert }}"
{% endraw %}
The default collection period is one hour, which is likely too long for most
systems as CloudKitty charges by the **entire** collection period if any usage
is seen within this timeframe. This is regardless of actual usage, meaning that
even one minute will be charged as a full hour's usage. As a result, it is
recommended to adjust the collection interval, ``period`` (in units of
seconds), appropriately (e.g. ten minutes). Furthermore, when using Prometheus
as the collector, you need to change the ``scope_key`` to match the metrics
provided by the Prometheus OpenStack Exporter. Both of these can be achieved by
setting the following in ``kolla/config/cloudkitty.conf``:

.. code-block:: console
[collect]
scope_key = tenant_id
period = 600
You will need to configure which metrics CloudKitty should track. The following
example, set in ``kolla/config/cloudkitty/metrics.yml``, will track for VM flavors and
the total utilised volume.

.. code-block:: yaml
metrics:
openstack_nova_server_status:
alt_name: instance
groupby:
- uuid
- user_id
- tenant_id
metadata:
- flavor_id
- name
mutate: MAP
mutate_map:
0.0: 1.0 # ACTIVE
11.0: 1.0 # SHUTOFF
12.0: 1.0 # SUSPENDED
16.0: 1.0 # PAUSED
unit: instance
openstack_cinder_limits_volume_used_gb:
alt_name: storage
unit: GiB
groupby:
- tenant_id
If your system had Monasca deployed in the past, you likely have some
relabelled attributes in the Prometheus OpenStack exporter. To account for
this, you should either remove the custom relabelling (in
``kolla/config/prometheus.yml``) or change your ``metrics.yml`` to use the
correct attributes.

Post-configuration with openstack-config
========================================

This is an example `openstack-config
<https://github.com/stackhpc/openstack-config>`__ setup to create mappings for
the metrics configured above. Note that the costs are scaled for the ten minute
collection period, e.g. a flavor with 1 VCPU will cost 1 unit per hour.

.. code-block:: yaml
# Map flavors based on VCPUs
openstack_ratings_hashmap_field_mappings:
- service: instance
name: flavor_id
mappings:
- value: '1' # tiny compute flavor (1 vcpu) with an OpenStack flavor ID of 1
cost: 0.1666666666666666
type: flat
- value: '2' # small compute flavor (2 vcpus) with an OpenStack flavor ID of 2
cost: 0.3333333333333333
type: flat
- value: '3' # medium compute flavor (3 vcpus) with an OpenStack flavor ID of 3
cost: 0.5
type: flat
- value: '4' # large compute flavor (4 vcpus) with an OpenStack flavor ID of 4
cost: 0.6666666666666666
type: flat
- value: '5' # xlarge compute flavor (8 vcpus) with an OpenStack flavor ID of 5
cost: 1.3333333333333333
type: flat
- value: '6' # tiny 2 compute flavor (2 vcpus) with an OpenStack flavor ID of 6
cost: 0.3333333333333333
type: flat
# Map volumes based on GB
openstack_ratings_hashmap_service_mappings:
- service: storage
cost: 0.16666666666666666
type: flat
See the `OpenStack CloudKitty Ratings role
<https://github.com/stackhpc/ansible-collection-openstack/tree/main/roles/os_ratings>`__
for more details.
Loading

0 comments on commit ba3fc8a

Please sign in to comment.