Merge branch 'main' into time-sharding

grafana · Nov 6, 2024 · 36e0cfc · 36e0cfc
2 parents 7787735 + 7669385
commit 36e0cfc
Show file tree

Hide file tree

Showing 26 changed files with 955 additions and 563 deletions.
diff --git a/.github/workflows/operator-publish-operator-hub.yml b/.github/workflows/operator-publish-operator-hub.yml
@@ -10,10 +10,16 @@ jobs:
     with:
       org: redhat-openshift-ecosystem
       repo: community-operators-prod
+    secrets:
+      APP_ID: ${{ secrets.APP_ID }}
+      APP_PRIVATE_KEY: ${{ secrets.APP_PRIVATE_KEY }}
 
   operator-hub-community-release:
     if: startsWith(github.event.release.tag_name, 'operator/')
     uses: ./.github/workflows/operator-reusable-hub-release.yml
     with:
       org: k8s-operatorhub
       repo: community-operators
+    secrets:
+      APP_ID: ${{ secrets.APP_ID }}
+      APP_PRIVATE_KEY: ${{ secrets.APP_PRIVATE_KEY }}
diff --git a/.github/workflows/operator-release-please.yml b/.github/workflows/operator-release-please.yml
@@ -17,6 +17,9 @@ jobs:
     outputs:
       release_created: ${{ steps.release.outputs.operator--release_created }}
       release_name: ${{ steps.release.outputs.operator--tag_name }}
+      release_major: ${{ steps.release.outputs.operator--major }}
+      release_minor: ${{ steps.release.outputs.operator--minor }}
+      release_patch: ${{ steps.release.outputs.operator--patch }}
     steps:
       - id: "get_github_app_token"
         name: Get GitHub App Token
@@ -53,4 +56,27 @@ jobs:
           GH_TOKEN: ${{ steps.get_github_app_token.outputs.token }}
         working-directory: "release"
         run: |
-          gh release edit "${{ needs.releasePlease.outputs.release_name }}" --draft=false --latest=false
+          gh release edit "${{ needs.releasePlease.outputs.release_name }}" --draft=false --latest=false
+  publishImages:
+    env:
+      BUILD_TIMEOUT: 60
+      IMAGE_PREFIX: "grafana"
+    needs:
+      - "publishRelease"
+    runs-on: ubuntu-latest
+    steps:
+      - name: "Set up QEMU"
+        uses: "docker/setup-qemu-action@v3"
+      - name: "Set up docker buildx"
+        uses: "docker/setup-buildx-action@v3"
+      - name: "Login to DockerHub (from vault)"
+        uses: "grafana/shared-workflows/actions/dockerhub-login@main"
+      - name: "Build and push"
+        timeout-minutes: "${{ env.BUILD_TIMEOUT }}"
+        uses: "docker/build-push-action@v6"
+        with:
+          context: "operator"
+          file: "Dockerfile"
+          platforms: "linux/amd64,linux/arm64,linux/arm"
+          push: true
+          tags: "${{ env.IMAGE_PREFIX }}/loki-operator:${{ steps.releasePlease.outputs.release_major }}.${{ steps.releasePlease.outputs.release_minor }}.${{ steps.releasePlease.outputs.release_patch }}"
diff --git a/clients/pkg/logentry/stages/extensions_test.go b/clients/pkg/logentry/stages/extensions_test.go
@@ -122,7 +122,7 @@ func TestCRI_tags(t *testing.T) {
 			},
 			expected: []string{
 				"partial line 1 log finished",     // belongs to stream `{foo="bar"}`
-				"partial line 2 another full log", // belongs to stream `{foo="bar2"}
+				"partial line 2 another full log", // belongs to stream `{foo="bar2"}`
 			},
 		},
 		{

diff --git a/docs/sources/alert/_index.md b/docs/sources/alert/_index.md
@@ -33,7 +33,6 @@ ruler:
     kvstore:
       store: inmemory
   enable_api: true
-
 ```
 
 We support two kinds of rules: [alerting](#alerting-rules) rules and [recording](#recording-rules) rules.
@@ -62,9 +61,9 @@ groups:
             > 0.05
         for: 10m
         labels:
-            severity: page
+          severity: page
         annotations:
-            summary: High request latency
+          summary: High request latency
   - name: credentials_leak
     rules:
       - alert: http-credentials-leaked
@@ -106,7 +105,6 @@ This query (`expr`) will be executed every 1 minute (`interval`), the result of
 name we have defined (`record`). This metric named `nginx:requests:rate1m` can now be sent to Prometheus, where it will be stored
 just like any other metric.
 
-
 ### Limiting Alerts and Recording Rule Samples
 
 Like [Prometheus](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/#limiting-alerts-and-series), you can configure a limit for alerts produced by alerting rules and samples produced by recording rules. This limit can be configured per-group. Using limits can prevent a faulty rule from generating a large number of alerts or recording samples. When the limit is exceeded, all recording samples produced by the rule are discarded, and if it is an alerting rule, all alerts for the rule, active, pending, or inactive, are cleared. The event will be recorded as an error in the evaluation, and the rule health will be set to `err`. The default value for limit is `0` meaning no limit.
@@ -115,8 +113,6 @@ Like [Prometheus](https://prometheus.io/docs/prometheus/latest/configuration/rec
 
 Here is an example of a rule group along with its limit configured.
 
-
-
 ```yaml
 groups:
   - name: production_rules
@@ -131,9 +127,9 @@ groups:
             > 0.05
         for: 10m
         labels:
-            severity: page
+          severity: page
         annotations:
-            summary: High request latency
+          summary: High request latency
       - record: nginx:requests:rate1m
         expr: |
           sum(
@@ -184,6 +180,7 @@ We don't always control the source code of applications we run. Load balancers a
 ### Event alerting
 
 Sometimes you want to know whether _any_ instance of something has occurred. Alerting based on logs can be a great way to handle this, such as finding examples of leaked authentication credentials:
+
 ```yaml
 - name: credentials_leak
   rules:
@@ -209,6 +206,7 @@ As an example, we can use LogQL v2 to help Loki to monitor _itself_, alerting us
 ## Interacting with the Ruler
 
 ### Lokitool
+
 Because the rule files are identical to Prometheus rule files, we can interact with the Loki Ruler via `lokitool`.
 
 {{< admonition type="note" >}}
@@ -284,6 +282,28 @@ resource "loki_rule_group_recording" "test" {
 
 ```
 
+### Cortex rules action
+
+The [Cortex rules action](https://github.com/grafana/cortex-rules-action) introduced Loki as a backend which can be handy for managing rules in a CI/CD pipeline. It can be used to lint, diff, and sync rules between a local directory and a remote Loki instance.
+
+```yaml
+- name: Lint Loki rules
+  uses: grafana/cortex-rules-action@master
+  env:
+    ACTION: check
+    RULES_DIR: <source_dir_of_rules> # Example: logs/recording_rules/,logs/alerts/
+    BACKEND: loki
+
+- name: Deploy rules to Loki staging
+  uses: grafana/cortex-rules-action@master
+  env:
+    CORTEX_ADDRESS: <loki_ingress_addr>
+    CORTEX_TENANT_ID: fake
+    ACTION: sync
+    RULES_DIR: <source_dir_of_rules> # Example: logs/recording_rules/,logs/alerts/
+    BACKEND: loki
+```
+
 ## Scheduling and best practices
 
 One option to scale the Ruler is by scaling it horizontally. However, with multiple Ruler instances running they will need to coordinate to determine which instance will evaluate which rule. Similar to the ingesters, the Rulers establish a hash ring to divide up the responsibilities of evaluating rules.
@@ -294,19 +314,19 @@ A full sharding-enabled Ruler example is:
 
 ```yaml
 ruler:
-    alertmanager_url: <alertmanager_endpoint>
-    enable_alertmanager_v2: true
-    enable_api: true
-    enable_sharding: true
-    ring:
-        kvstore:
-            consul:
-                host: consul.loki-dev.svc.cluster.local:8500
-            store: consul
-    rule_path: /tmp/rules
-    storage:
-        gcs:
-            bucket_name: <loki-rules-bucket>
+  alertmanager_url: <alertmanager_endpoint>
+  enable_alertmanager_v2: true # true by default since Loki 3.2.0
+  enable_api: true
+  enable_sharding: true
+  ring:
+    kvstore:
+      consul:
+        host: consul.loki-dev.svc.cluster.local:8500
+      store: consul
+  rule_path: /tmp/rules
+  storage:
+    gcs:
+      bucket_name: <loki-rules-bucket>
 ```
 
 ## Ruler storage
@@ -316,18 +336,25 @@ The Ruler supports the following types of storage: `azure`, `gcs`, `s3`, `swift`
 The local implementation reads the rule files off of the local filesystem. This is a read-only backend that does not support the creation and deletion of rules through the [Ruler API](https://grafana.com/docs/loki/<LOKI_VERSION>/reference/loki-http-api#ruler). Despite the fact that it reads the local filesystem this method can still be used in a sharded Ruler configuration if the operator takes care to load the same rules to every Ruler. For instance, this could be accomplished by mounting a [Kubernetes ConfigMap](https://kubernetes.io/docs/concepts/configuration/configmap/) onto every Ruler pod.
 
 A typical local configuration might look something like:
+
 ```
   -ruler.storage.type=local
   -ruler.storage.local.directory=/tmp/loki/rules
 ```
 
 With the above configuration, the Ruler would expect the following layout:
+
 ```
 /tmp/loki/rules/<tenant id>/rules1.yaml
                            /rules2.yaml
 ```
+
 Yaml files are expected to be [Prometheus-compatible](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) but include LogQL expressions as specified in the beginning of this doc.
 
+## Remote rule evaluation
+
+With larger deployments and complex rules, running a ruler in local evaluation mode causes problems where results could be inconsistent or incomplete compared to what you see in Grafana. To solve this, use the remote evaluation mode to evaluate rules against the query frontend. A more detailed explanation can be found in [scalability documentation](https://grafana.com/docs/loki/<LOKI_VERSION>/operations/scalability/#remote-rule-evaluation).
+
 ## Future improvements
 
 There are a few things coming to increase the robustness of this service. In no particular order:

diff --git a/docs/sources/shared/configuration.md b/docs/sources/shared/configuration.md
@@ -3336,6 +3336,11 @@ The `limits_config` block configures global and per-tenant limits in Loki. The v
 # CLI flag: -validation.discover-log-levels
 [discover_log_levels: <boolean> | default = true]
 
+# Field name to use for log levels. If not set, log level would be detected
+# based on pre-defined labels as mentioned above.
+# CLI flag: -validation.log-level-fields
+[log_level_fields: <list of strings> | default = [level LEVEL Level Severity severity SEVERITY lvl LVL Lvl]]
+
 # When true an ingester takes into account only the streams that it owns
 # according to the ring while applying the stream limit.
 # CLI flag: -ingester.use-owned-stream-count

diff --git a/go.mod b/go.mod
@@ -277,7 +277,7 @@ require (
 	github.com/go-playground/validator/v10 v10.19.0 // indirect
 	github.com/go-zookeeper/zk v1.0.3 // indirect
 	github.com/gofrs/flock v0.8.1 // indirect
-	github.com/golang-jwt/jwt/v4 v4.5.0 // indirect
+	github.com/golang-jwt/jwt/v4 v4.5.1 // indirect
 	github.com/golang-jwt/jwt/v5 v5.2.1 // indirect
 	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
 	github.com/google/btree v1.1.3 // indirect

diff --git a/go.sum b/go.sum
@@ -1563,8 +1563,9 @@ github.com/gogo/status v1.1.1/go.mod h1:jpG3dM5QPcqu19Hg8lkUhBFBa3TcLs1DG7+2Jqci
 github.com/goji/httpauth v0.0.0-20160601135302-2da839ab0f4d/go.mod h1:nnjvkQ9ptGaCkuDUx6wNykzzlUixGxvkme+H/lnzb+A=
 github.com/golang-jwt/jwt/v4 v4.0.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg=
 github.com/golang-jwt/jwt/v4 v4.2.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg=
-github.com/golang-jwt/jwt/v4 v4.5.0 h1:7cYmW1XlMY7h7ii7UhUyChSgS5wUJEnm9uZVTGqOWzg=
 github.com/golang-jwt/jwt/v4 v4.5.0/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
+github.com/golang-jwt/jwt/v4 v4.5.1 h1:JdqV9zKUdtaa9gdPlywC3aeoEsR681PlKC+4F5gQgeo=
+github.com/golang-jwt/jwt/v4 v4.5.1/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
 github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk=
 github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
 github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=

diff --git a/operator/docs/operator/release.md b/operator/docs/operator/release.md
@@ -15,9 +15,10 @@ toc: true
 
 This document will go over the design of the release process for the Loki Operator and how to release it.
 
-# Design
+## Design
 
 To release Loki Operator we need the following:
+
 1. Bump the Loki Operator version and generate the bundle manifests with `make bundle-all`;
 2. Update the CHANGELOG.md with the new version;
 3. Create a release tag and a release on GitHub;
@@ -27,50 +28,53 @@ Loki Operator uses the GitHub [action release-please](https://github.com/google-
 
 In the following sections, we will go over how the workflows are configured.
 
-## release-please
+### release-please
 
 release-please automates CHANGELOG generation, the creation of GitHub releases, and version bumps. It does so by parsing the git history, looking for Conventional Commit messages, and creating release PRs. Once a release PR is merged release-please will create the release and it will again wait for a releasable unit before opening the next release PR. A releasable unit is a commit with one of the following prefixes: "feat", "fix", and "deps".
 
 The workflow that is responsible for the operator release-please is `.github/workflows/operator-release-please.yml`. Note that the operator release-please process is different from the one used by Loki. The operator release-please configuration lives in `operator/release-please-config.json`.
 
 Useful links:
+
 - release-please [customizing releases documentation](https://github.com/googleapis/release-please/blob/main/docs/customizing.md)
 - release-please [config documentation](https://github.com/googleapis/release-please/blob/main/docs/manifest-releaser.md#configfile)
 
 The following sub-section contains some notes on the Loki operator release-please configuration:
+
 - Use of `bump-minor-pre-major` and `bump-patch-for-minor-pre-major`;
 - Use of `draft`;
 - Preventing merging the release-please PR without updating the manifests;
 
-### Use of `bump-minor-pre-major` and `bump-patch-for-minor-pre-major` 
+#### Use of `bump-minor-pre-major` and `bump-patch-for-minor-pre-major` 
 
 Since the operator is still pre `v1.0.0` we are leveraging `bump-minor-pre-major` and `bump-patch-for-minor-pre-major` so that merging "feat", "fix", and "deps" commits will only bump a patch version and merging "feat!" and "fix!" will bump the minor version.
 
 As of writing, the operator release-please will only act on merges to `main`. This means that we can support the following release scenarios:
+
 - Case 1: Release a patch version of v0.Y.x+1 with the diff from v0.Y.x. This is only supported until a breaking feature gets merged to `main`.
 - Case 2: Release a new minor version v0.Y+1.0 with the diff from v0.Y.x
 
-### Use of `draft`
+#### Use of `draft`
 
 Since the operator shares the same repo with Loki, we want to make sure that, when we create a release of the operator we don't that release to `latest`, otherwise it would look like the latest release from the operator was Loki's latest release. Unfortunately, release-please doesn't provide a way to disable this, so instead we enable `draft`. `draft` makes it so releases created by release-please are only created in draft. We then use a step that will publish the release without setting it to the latest.
 
-### Preventing merging the release-please PR without updating the manifests
+#### Preventing merging the release-please PR without updating the manifests
 
 Since step 1. is currently not automated and disconnected from release-please we have put in place a workflow in `.github/workflows/operator-check-prepare-release-commit.yml` that runs on release-please PRs. This workflow is responsible for making sure that in master exists a commit with the message `chore(operator): prepare community release v$VERSION`. Once we automate step 1. we should be able to remove this workflow.
 
-## Publish release to operatorhubs
+### Publish release to operatorhubs
 
 To publish a community release of Loki Operator to the community hubs we leverage the workflow in `.github/workflows/operator-publish-operator-hub.yml` this workflow is set to trigger on tag creation that matches `operator/`.
 
 This workflow will then use a workflow `.github/workflows/operator-reusable-hub-release.yml` that's responsible for:
+
 - Creating on the folder `operators/loki-operator/` a new folder with the manifests for the new version;
 - Adding the ocp supported version annotation to the `metadata.yaml` file only in the OpenShift community repo;
 - Creating a PR for the appropriate community repo.
 
-# Releasing
+## Releasing
 
-1. Create a PR to bump the version (i.e https://github.com/grafana/loki/pull/12246), be careful with the commit message;
-2. Re-triggering the action `operator-publish-operator-hub` on the release-please PR;
-3. Merging the release-please PR (i.e TBD );
+1. Create a PR to bump the version (i.e [v0.6.1 preparation PR](https://github.com/grafana/loki/pull/13105)), be careful with the commit message;
+2. Re-trigger the action `operator-publish-operator-hub` on the release-please PR;
+3. Merge the release-please PR (i.e [v0.6.1 release PR](https://github.com/grafana/loki/pull/12593) );
 4. Grafana bot will automatically open a PRs to [k8s-operatorhub/community-operators](https://github.com/k8s-operatorhub/community-operators) and [redhat-openshift-ecosystem/community-operators-prod](https://github.com/redhat-openshift-ecosystem/community-operators-prod);
-
diff --git a/operator/internal/manifests/openshift/otlp.go b/operator/internal/manifests/openshift/otlp.go
@@ -86,6 +86,7 @@ func DefaultOTLPAttributes(disableRecommended bool) config.OTLPAttributeConfig {
 				"k8s.event.user_agent",
 				"k8s.user.groups",
 				"k8s.user.username",
+				"level",
 				"log.iostream",
 			},
 		},

diff --git a/operator/release-please-config.json b/operator/release-please-config.json
@@ -1,5 +1,4 @@
 {
-    "bootstrap-sha": "d4353fa63d9283a941b10b6c90537901e557a9f1",
     "bump-minor-pre-major": true,
     "bump-patch-for-minor-pre-major": true,
     "include-component-in-tag": true,