Skip to content

Commit

Permalink
Telemetry roadmap config (apollographql#4061)
Browse files Browse the repository at this point in the history
This PR creates a config structure that will set us up a better
telemetry future.

Documentation is current in progress.

The existing configuration has grown organically, and now that
inconsistencies have been removed we can now look to adding new
features.

This PR contains a feature gate: `telemetry_next` that must be enabled
to get the new config.

Run the following to generate the schema for exploration:
```
cargo run --features telemetry_next -- --schema > schema.json
```

The goals of this config are:

* Follow the Otel specs. There's plenty of standard attributes and
instruments as part of the spec.
* Custom events and instruments.
* Standardize access to data at the relevant service.
* Consistency across metrics/tracing/logging.


Example config:

```yaml
telemetry:

  logging:
    common:
      service_name: router
    stdout:
      enabled: false
      format: bunyan
    file:
      enabled: false
      format: bunyan

  tracing:
    common: # Renamed from trace_config
      max_attributes_per_event: 128
      max_attributes_per_span: 128
      max_attributes_per_link: 128
      max_events_per_span: 128
      max_links_per_span: 128
      parent_based_sampler: true
      sampler: always_on
      service_name: router
      service_namespace: "default"
      resource:
        d: e

      # Resources are otel config not represented in the yaml config


    propagation:
      baggage: false
      jaeger: false
      datadog: false
      request:
        header_name: "X-REQUEST-ID"
      trace_context: false
      zipkin: false

    otlp:
      enabled: true
      endpoint: "http://localhost:4317/v1/traces"




  metrics:
    common:
      service_namespace: "default"
      service_name: router
      buckets:
        - 0.1

      resource:
        test: foo
    prometheus:
      enabled: true
      path: /metrics
    otlp:
      enabled: true


  instruments:
    default_attribute_requirement_level: required
    router:
      http.server.active_requests: true
      my_instrument:
        value: unit
        type: counter
        unit: kb
        description: "my description"
        event: on_error
        attributes:
          http.response.status_code: false
          "my_attribute":
            response_header: "X-MY-HEADER"
            default: "unknown"
            redact: "foo"

    supergraph:
      my_instrument:
        value: unit
        event: on_error
        type: counter
        unit: kb
        description: "my description"
    subgraph:
      my_instrument:
        value: unit
        event: on_error
        type: counter
        unit: kb
        description: "my description"


  events:
    router:
      request: true
      response: false
      error: false
      test:
        message: "foo"
        level: info
        attributes:
          http.response.body.size: false


  spans:
    default_attribute_requirement_level: required
    legacy_request_span: true
    # The request span will be disappearing
    # router is the new root span
    router:
      attributes:
        dd.trace_id: false
        http.request.body.size: false
        http.response.body.size: false
        http.request.method: false
        http.request.method.original: false
        http.response.status_code: false
        network.protocol.name: false
        network.protocol.version: false
        network.transport: false
        error.type: false
        network.type: false
        trace_id: false
        user_agent.original: false
        client.address: false
        client.port: false
        http.route: false
        network.local.address: false
        network.local.port: false
        network.peer.address: false
        network.peer.port: false
        server.address: false
        server.port: false
        url.path: false
        url.query: false
        url.scheme: false
        "x-custom1":
          trace_id: datadog
        "x-custom2":

          response_header: "X-CUSTOM2"

          default: "unknown"
        "x-custom3":
          request_header: "X-CUSTOM3"
        "x-custom5":
          response_context: "X-CUSTOM3"
        "x-custom8":
          env: "ENV_VAR"

      #etc...
    supergraph:
      attributes:
        graphql.document: false
        graphql.operation.name: true
        graphql.operation.type: true

        "x-custom":
          query_variable: "arg1"
          default: "unknown"
          redact: ""
        "x-custom2":
          response_body: "arg2"
        "x-custom4":
          request_context: "X-CUSTOM3"
        "x-custom5":
          response_context: "X-CUSTOM3"
        "x-custom6":
          operation_name: string
        "x-custom7":
          operation_name: hash
        "x-custom8":
          env: "ENV_VAR"
      #etc...
    subgraph:

      attributes:

        graphql.federation.subgraph.name: false
        graphql.operation.name: false
        graphql.operation.type: true
        "x-custom":
          subgraph_operation_name: string
          default: "unknown"
        "x-custom2":
          subgraph_response_body: "arg2"
        "x-custom4":
          request_context: "X-CUSTOM3"
        "x-custom5":
          response_context: "X-CUSTOM3"
```

<!-- start metadata -->
---

**Checklist**

Complete the checklist (and note appropriate exceptions) before the PR
is marked ready-for-review.

- [ ] Changes are compatible[^1]
- [ ] Documentation[^2] completed
- [ ] Performance impact assessed and acceptable
- Tests added and passing[^3]
    - [ ] Unit Tests
    - [ ] Integration Tests
    - [ ] Manual Tests

**Exceptions**

*Note any exceptions here*

**Notes**

[^1]: It may be appropriate to bring upcoming changes to the attention
of other (impacted) groups. Please endeavour to do this before seeking
PR approval. The mechanism for doing this will vary considerably, so use
your judgement as to how and when to do this.
[^2]: Configuration is an important part of many changes. Where
applicable please try to document configuration examples.
[^3]: Tick whichever testing boxes are applicable. If you are adding
Manual Tests, please document the manual testing (extensively) in the
Exceptions.

---------

Co-authored-by: bryn <[email protected]>
  • Loading branch information
BrynCooke and bryn authored Oct 25, 2023
1 parent c5a6e40 commit d506c4b
Show file tree
Hide file tree
Showing 9 changed files with 1,114 additions and 0 deletions.
4 changes: 4 additions & 0 deletions apollo-router/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ console = ["tokio/tracing", "console-subscriber"]
# See https://github.com/apollographql/federation-rs/pull/185
docs_rs = ["router-bridge/docs_rs"]

# Enables the use of new telemetry features that are under development
# and not yet ready for production use.
telemetry_next = []

[package.metadata.docs.rs]
features = ["docs_rs"]

Expand Down
15 changes: 15 additions & 0 deletions apollo-router/src/plugins/telemetry/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,27 @@ pub(crate) struct Conf {
/// Logging configuration
#[serde(rename = "experimental_logging", default)]
pub(crate) logging: Logging,

#[cfg(feature = "telemetry_next")]
#[serde(rename = "logging", default)]
#[allow(dead_code)]
pub(crate) new_logging: config_new::logging::Logging,
/// Metrics configuration
pub(crate) metrics: Metrics,
/// Tracing configuration
pub(crate) tracing: Tracing,
/// Apollo reporting configuration
pub(crate) apollo: apollo::Config,

#[cfg(feature = "telemetry_next")]
/// Event configuration
pub(crate) events: config_new::events::Events,
#[cfg(feature = "telemetry_next")]
/// Span configuration
pub(crate) spans: config_new::spans::Spans,
#[cfg(feature = "telemetry_next")]
/// Instrument configuration
pub(crate) instruments: config_new::instruments::Instruments,
}

/// Metrics configuration
Expand Down
Loading

0 comments on commit d506c4b

Please sign in to comment.