diff --git a/.golangci.yml b/.golangci.yml index a46841f56..208a73c41 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -63,6 +63,7 @@ linters-settings: # List of allowed packages. allow: - $gostd + - github.com/armon/go-metrics - github.com/BurntSushi/toml - github.com/Masterminds/sprig/v3 - github.com/davecgh/go-spew/spew @@ -84,10 +85,11 @@ linters-settings: - github.com/mitchellh/hashstructure - github.com/mitchellh/mapstructure - github.com/pkg/errors + - github.com/prometheus/client_golang/prometheus - github.com/stretchr/testify/assert - github.com/stretchr/testify/require - github.com/coreos/go-systemd run: timeout: 10m - concurrency: 4 \ No newline at end of file + concurrency: 4 diff --git a/README.md b/README.md index 8efe66d58..680e66b58 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,7 @@ this functionality might prove useful. - [Multiple Commands](#multiple-commands) - [Multi-phase Execution](#multi-phase-execution) - [Debugging](#debugging) +- [Telemetry](#telemetry) - [FAQ](#faq) - [Contributing](#contributing) @@ -407,6 +408,71 @@ $ consul-template -log-level debug ... # ... ``` +## Telemetry + +Consul Template uses the [armon/go-metrics](https://github.com/armon/go-metrics) library to implement the Consul Template metrics system. It currently supports metrics exported to circonus API, statsd server, statsite server, dogstatsd server, and prometheus endpoint. + +### Key Metrics + +These metrics offer insight into Consul Template and capture subprocess activities. The number of dependencies are aggregated from the configured templates, and metrics are collected around a dependency when it is updated from source. This is useful to correlate any upstream changes to downstream actions originating from Consul Template. + +Metrics are monitored around template rendering and execution of template commands. These +metrics indicate the rendering status of a template and how long commands for a template takes +to provide insight on performance of the templates. + +| Metric Name | Labels | Description | +|-|:-:|-| +| `consul-template.dependencies_received` | type=(consul\|vault\|local), id=dependencyString | A counter of dependencies received from monitoring value changes | +| `consul-template.templates_rendered` | id=templateID, status=(rendered\|would\|quiescence) | A counter of templates rendered | +| `consul-template.runner_actions` | action=(start\|stop\|run) | A count of runner actions | +| `consul-template.commands_exec` | status=(success\|error) | The number of commands executed after rendering templates | + +#### Metrics yet to be implemented + +The current metrics were implemented by takin as reference the [previous metric-related PR](https://github.com/hashicorp/consul-template/pull/1378/files#diff-d980d9aed26114a3414812b58d45770a201c1f29b7f67ddc0ef0891a8f1b7736), but as the `armon/go-metrics` library doesn't implement all types of metrics yet, histogram metrics could not be implemented. + +Said metrics are described as below: + +| Metric Name | Labels | Description | +|-|:-:|-| +| `consul-template.dependencies` | type=(consul\|vault\|local) | The number of dependencies grouped by types | +| `consul-template.templates` | | The number of templates configured | +| `consul-template.commands_exec_time` | id=tmplDestination | The execution time (seconds) of a template command | + + +### Metric Samples + +#### DogStatsD + +``` +2020-05-05 11:57:46.143979 consul-template.runner_actions:1|c|#action:start +consul-template.runner_actions:2|c|#action:run +consul-template.dependencies_received:1|c|#id:kv.block(hello),type:consul +consul-template.templates_rendered:1|c|#id:aadcafd7f28f1d9fc5e76ab2e029f844,status:rendered +consul-template.commands_exec:1|c|#status:success +consul-template.commands_exec:0|c|#status:error +``` + +#### Prometheus + +``` +$ curl localhost:8888/metrics +# HELP consul_template_commands_exec The number of commands executed with labels status=(success|error) +# TYPE consul_template_commands_exec counter +consul_template_commands_exec{status="error"} 0 +consul_template_commands_exec{status="success"} 1 +# HELP consul_template_dependencies_received A counter of dependencies received with labels type=(consul|vault|local) and id=dependencyString +# TYPE consul_template_dependencies_received counter +consul_template_dependencies_received{id="kv.block(hello)",type="consul"} 1 +# HELP consul_template_runner_actions A count of runner actions with labels action=(start|stop|run) +# TYPE consul_template_runner_actions counter +consul_template_runner_actions{action="run"} 2 +consul_template_runner_actions{action="start"} 1 +# HELP consul_template_templates_rendered A counter of templates rendered with labels id=templateID and status=(rendered|would|quiescence) +# TYPE consul_template_templates_rendered counter +consul_template_templates_rendered{id="aadcafd7f28f1d9fc5e76ab2e029f844",status="rendered"} 1 +``` + ## FAQ **Q: How is this different than confd?**
diff --git a/cli.go b/cli.go index 8aeb3692e..1b15fe859 100644 --- a/cli.go +++ b/cli.go @@ -20,6 +20,7 @@ import ( "github.com/hashicorp/consul-template/manager" "github.com/hashicorp/consul-template/service_os" "github.com/hashicorp/consul-template/signals" + "github.com/hashicorp/consul-template/telemetry" "github.com/hashicorp/consul-template/version" ) @@ -132,6 +133,13 @@ func (cli *CLI) Run(args []string) int { } }() + // Initialize telemetry + tel, err := telemetry.Init(config.Telemetry) + if err != nil { + return logError(err, ExitCodeConfigError) + } + defer tel.Stop() + // Initial runner runner, err := manager.NewRunner(config, dry) if err != nil { diff --git a/config/config.go b/config/config.go index c8109a7da..b8d9f9ae2 100644 --- a/config/config.go +++ b/config/config.go @@ -82,6 +82,9 @@ type Config struct { // Syslog is the configuration for syslog. Syslog *SyslogConfig `mapstructure:"syslog"` + // Telemetry is the configuration for collecting and emitting telemetry. + Telemetry *TelemetryConfig `mapstructure:"telemetry"` + // Templates is the list of templates. Templates *TemplateConfigs `mapstructure:"template"` @@ -174,6 +177,10 @@ func (c *Config) Copy() *Config { o.Syslog = c.Syslog.Copy() } + if c.Telemetry != nil { + o.Telemetry = c.Telemetry.Copy() + } + if c.Templates != nil { o.Templates = c.Templates.Copy() } @@ -265,6 +272,10 @@ func (c *Config) Merge(o *Config) *Config { r.Syslog = r.Syslog.Merge(o.Syslog) } + if o.Telemetry != nil { + r.Telemetry = r.Telemetry.Merge(o.Telemetry) + } + if o.Templates != nil { r.Templates = r.Templates.Merge(o.Templates) } @@ -336,6 +347,7 @@ func Parse(s string) (*Config, error) { "nomad.transport", "ssl", "syslog", + "telemetry", "vault", "vault.retry", "vault.ssl", @@ -494,6 +506,7 @@ func (c *Config) GoString() string { "ReloadSignal:%s, "+ "FileLog:%#v, "+ "Syslog:%#v, "+ + "Telemetry:%#v, "+ "Templates:%#v, "+ "TemplateErrFatal:%#v"+ "Vault:%#v, "+ @@ -513,6 +526,7 @@ func (c *Config) GoString() string { SignalGoString(c.ReloadSignal), c.FileLog, c.Syslog, + c.Telemetry.GoString(), c.Templates, c.TemplateErrFatal, c.Vault, @@ -561,6 +575,7 @@ func DefaultConfig() *Config { FileLog: DefaultLogFileConfig(), Nomad: DefaultNomadConfig(), Syslog: DefaultSyslogConfig(), + Telemetry: DefaultTelemetryConfig(), Templates: DefaultTemplateConfigs(), Vault: DefaultVaultConfig(), Wait: DefaultWaitConfig(), @@ -634,6 +649,11 @@ func (c *Config) Finalize() { } c.Syslog.Finalize() + if c.Telemetry == nil { + c.Telemetry = DefaultTelemetryConfig() + } + c.Telemetry.Finalize() + if c.Templates == nil { c.Templates = DefaultTemplateConfigs() } diff --git a/config/syslog.go b/config/syslog.go index dcc1280d9..963ce645d 100644 --- a/config/syslog.go +++ b/config/syslog.go @@ -99,7 +99,7 @@ func (c *SyslogConfig) GoString() string { return fmt.Sprintf("&SyslogConfig{"+ "Enabled:%s, "+ - "Facility:%s"+ + "Facility:%s, "+ "Name:%s"+ "}", BoolGoString(c.Enabled), diff --git a/config/telemetry.go b/config/telemetry.go new file mode 100644 index 000000000..9dc32f43c --- /dev/null +++ b/config/telemetry.go @@ -0,0 +1,414 @@ +package config + +/* + Config structure based on Consul telemetry config: + https://github.com/hashicorp/consul/blob/main/lib/telemetry.go#L29 +*/ + +import ( + "fmt" + "strconv" + "time" +) + +const ( + defaultMetricsPrefix = "consul_template" +) + +// TelemetryConfig is embedded in config.RuntimeConfig and holds the +// configuration variables for go-metrics. It is a separate struct to allow it +// to be exported as JSON and passed to other process like managed connect +// proxies so they can inherit the agent's telemetry config. +// +// It is in lib package rather than agent/config because we need to use it in +// the shared InitTelemetry functions below, but we can't import agent/config +// due to a dependency cycle. +type TelemetryConfig struct { + // Disable may be set to true to have InitTelemetry to skip initialization + // and return a nil MetricsSink. + Disable bool + + // Circonus*: see https://github.com/circonus-labs/circonus-gometrics + // for more details on the various configuration options. + // Valid configuration combinations: + // - CirconusAPIToken + // metric management enabled (search for existing check or create a new one) + // - CirconusSubmissionUrl + // metric management disabled (use check with specified submission_url, + // broker must be using a public SSL certificate) + // - CirconusAPIToken + CirconusCheckSubmissionURL + // metric management enabled (use check with specified submission_url) + // - CirconusAPIToken + CirconusCheckID + // metric management enabled (use check with specified id) + + // CirconusAPIApp is an app name associated with API token. + // Default: "consul" + // + // hcl: telemetry { circonus_api_app = string } + CirconusAPIApp string `json:"circonus_api_app,omitempty" mapstructure:"circonus_api_app"` + + // CirconusAPIToken is a valid API Token used to create/manage check. If provided, + // metric management is enabled. + // Default: none + // + // hcl: telemetry { circonus_api_token = string } + CirconusAPIToken string `json:"circonus_api_token,omitempty" mapstructure:"circonus_api_token"` + + // CirconusAPIURL is the base URL to use for contacting the Circonus API. + // Default: "https://api.circonus.com/v2" + // + // hcl: telemetry { circonus_api_url = string } + CirconusAPIURL string `json:"circonus_api_url,omitempty" mapstructure:"circonus_api_url"` + + // CirconusBrokerID is an explicit broker to use when creating a new check. The numeric portion + // of broker._cid. If metric management is enabled and neither a Submission URL nor Check ID + // is provided, an attempt will be made to search for an existing check using Instance ID and + // Search Tag. If one is not found, a new HTTPTRAP check will be created. + // Default: use Select Tag if provided, otherwise, a random Enterprise Broker associated + // with the specified API token or the default Circonus Broker. + // Default: none + // + // hcl: telemetry { circonus_broker_id = string } + CirconusBrokerID string `json:"circonus_broker_id,omitempty" mapstructure:"circonus_broker_id"` + + // CirconusBrokerSelectTag is a special tag which will be used to select a broker when + // a Broker ID is not provided. The best use of this is to as a hint for which broker + // should be used based on *where* this particular instance is running. + // (e.g. a specific geo location or datacenter, dc:sfo) + // Default: none + // + // hcl: telemetry { circonus_broker_select_tag = string } + CirconusBrokerSelectTag string `json:"circonus_broker_select_tag,omitempty" mapstructure:"circonus_broker_select_tag"` + + // CirconusCheckDisplayName is the name for the check which will be displayed in the Circonus UI. + // Default: value of CirconusCheckInstanceID + // + // hcl: telemetry { circonus_check_display_name = string } + CirconusCheckDisplayName string `json:"circonus_check_display_name,omitempty" mapstructure:"circonus_check_display_name"` + + // CirconusCheckForceMetricActivation will force enabling metrics, as they are encountered, + // if the metric already exists and is NOT active. If check management is enabled, the default + // behavior is to add new metrics as they are encountered. If the metric already exists in the + // check, it will *NOT* be activated. This setting overrides that behavior. + // Default: "false" + // + // hcl: telemetry { circonus_check_metrics_activation = (true|false) + CirconusCheckForceMetricActivation bool `json:"circonus_check_force_metric_activation,omitempty" mapstructure:"circonus_check_force_metric_activation"` + + // CirconusCheckID is the check id (not check bundle id) from a previously created + // HTTPTRAP check. The numeric portion of the check._cid field. + // Default: none + // + // hcl: telemetry { circonus_check_id = string } + CirconusCheckID string `json:"circonus_check_id,omitempty" mapstructure:"circonus_check_id"` + + // CirconusCheckInstanceID serves to uniquely identify the metrics coming from this "instance". + // It can be used to maintain metric continuity with transient or ephemeral instances as + // they move around within an infrastructure. + // Default: hostname:app + // + // hcl: telemetry { circonus_check_instance_id = string } + CirconusCheckInstanceID string `json:"circonus_check_instance_id,omitempty" mapstructure:"circonus_check_instance_id"` + + // CirconusCheckSearchTag is a special tag which, when coupled with the instance id, helps to + // narrow down the search results when neither a Submission URL or Check ID is provided. + // Default: service:app (e.g. service:consul) + // + // hcl: telemetry { circonus_check_search_tag = string } + CirconusCheckSearchTag string `json:"circonus_check_search_tag,omitempty" mapstructure:"circonus_check_search_tag"` + + // CirconusCheckSearchTag is a special tag which, when coupled with the instance id, helps to + // narrow down the search results when neither a Submission URL or Check ID is provided. + // Default: service:app (e.g. service:consul) + // + // hcl: telemetry { circonus_check_tags = string } + CirconusCheckTags string `json:"circonus_check_tags,omitempty" mapstructure:"circonus_check_tags"` + + // CirconusSubmissionInterval is the interval at which metrics are submitted to Circonus. + // Default: 10s + // + // hcl: telemetry { circonus_submission_interval = "duration" } + CirconusSubmissionInterval string `json:"circonus_submission_interval,omitempty" mapstructure:"circonus_submission_interval"` + + // CirconusCheckSubmissionURL is the check.config.submission_url field from a + // previously created HTTPTRAP check. + // Default: none + // + // hcl: telemetry { circonus_submission_url = string } + CirconusSubmissionURL string `json:"circonus_submission_url,omitempty" mapstructure:"circonus_submission_url"` + + // DisableHostname will disable hostname prefixing for all metrics. + // + // hcl: telemetry { disable_hostname = (true|false) + DisableHostname bool `json:"disable_hostname,omitempty" mapstructure:"disable_hostname"` + + // DogStatsdAddr is the address of a dogstatsd instance. If provided, + // metrics will be sent to that instance + // + // hcl: telemetry { dogstatsd_addr = string } + DogstatsdAddr string `json:"dogstatsd_addr,omitempty" mapstructure:"dogstatsd_addr"` + + // DogStatsdTags are the global tags that should be sent with each packet to dogstatsd + // It is a list of strings, where each string looks like "my_tag_name:my_tag_value" + // + // hcl: telemetry { dogstatsd_tags = []string } + DogstatsdTags []string `json:"dogstatsd_tags,omitempty" mapstructure:"dogstatsd_tags"` + + // FilterDefault is the default for whether to allow a metric that's not + // covered by the filter. + // + // hcl: telemetry { filter_default = (true|false) } + FilterDefault bool `json:"filter_default,omitempty" mapstructure:"filter_default"` + + // AllowedPrefixes is a list of filter rules to apply for allowing metrics + // by prefix. Use the 'prefix_filter' option and prefix rules with '+' to be + // included. + // + // hcl: telemetry { allowed_prefixes = ["", "", ...] } + AllowedPrefixes []string `json:"allowed_prefixes,omitempty" mapstructure:"allowed_prefixes"` + + // BlockedPrefixes is a list of filter rules to apply for blocking metrics + // by prefix. Use the 'prefix_filter' option and prefix rules with '-' to be + // excluded. + // + // hcl: telemetry { blocked_prefixes = ["", "", ...] } + BlockedPrefixes []string `json:"blocked_prefixes,omitempty" mapstructure:"blocked_prefixes"` + + // MetricsPrefix is the prefix used to write stats values to. + // Default: "consul_template." + // + // hcl: telemetry { metrics_prefix = string } + MetricsPrefix string `json:"metrics_prefix,omitempty" mapstructure:"metrics_prefix"` + + // StatsdAddr is the address of a statsd instance. If provided, + // metrics will be sent to that instance. + // + // hcl: telemetry { statsd_address = string } + StatsdAddr string `json:"statsd_address,omitempty" mapstructure:"statsd_address"` + + // StatsiteAddr is the address of a statsite instance. If provided, + // metrics will be streamed to that instance. + // + // hcl: telemetry { statsite_address = string } + StatsiteAddr string `json:"statsite_address,omitempty" mapstructure:"statsite_address"` + + // PrometheusRetentionTime is the time before a prometheus metric expires. + // + // hcl: telemetry { prometheus_retention_time = "duration" } + PrometheusRetentionTime time.Duration `json:"prometheus_retention_time,omitempty" mapstructure:"prometheus_retention_time"` + + // PrometheusPort is the REST port under which the metrics can be queried. + // + // hcl: telemetry { prometheus_port = int } + PrometheusPort int `json:"prometheus_port,omitempty" mapstructure:"prometheus_port"` +} + +func DefaultTelemetryConfig() *TelemetryConfig { + return &TelemetryConfig{} +} + +func (c *TelemetryConfig) Copy() *TelemetryConfig { + if c == nil { + return nil + } + + return &TelemetryConfig{ + Disable: c.Disable, + CirconusAPIApp: c.CirconusAPIApp, + CirconusAPIToken: c.CirconusAPIToken, + CirconusAPIURL: c.CirconusAPIURL, + CirconusBrokerID: c.CirconusBrokerID, + CirconusBrokerSelectTag: c.CirconusBrokerSelectTag, + CirconusCheckDisplayName: c.CirconusCheckDisplayName, + CirconusCheckForceMetricActivation: c.CirconusCheckForceMetricActivation, + CirconusCheckID: c.CirconusCheckID, + CirconusCheckInstanceID: c.CirconusCheckInstanceID, + CirconusCheckSearchTag: c.CirconusCheckSearchTag, + CirconusCheckTags: c.CirconusCheckTags, + CirconusSubmissionInterval: c.CirconusSubmissionInterval, + CirconusSubmissionURL: c.CirconusSubmissionURL, + DisableHostname: c.DisableHostname, + DogstatsdAddr: c.DogstatsdAddr, + DogstatsdTags: c.DogstatsdTags, + FilterDefault: c.FilterDefault, + AllowedPrefixes: c.AllowedPrefixes, + BlockedPrefixes: c.BlockedPrefixes, + MetricsPrefix: c.MetricsPrefix, + StatsdAddr: c.StatsdAddr, + StatsiteAddr: c.StatsiteAddr, + PrometheusPort: c.PrometheusPort, + PrometheusRetentionTime: c.PrometheusRetentionTime, + } +} + +// Merge combines all values in this configuration with the values in the other +// configuration, with values in the other configuration taking precedence. +// Maps and slices are merged, most other values are overwritten. +func (c *TelemetryConfig) Merge(o *TelemetryConfig) *TelemetryConfig { + if c == nil { + if o == nil { + return nil + } + return o.Copy() + } + if o == nil { + return c.Copy() + } + + r := c.Copy() + + r.Disable = o.Disable + + if o.CirconusAPIApp != "" { + r.CirconusAPIApp = o.CirconusAPIApp + } + if o.CirconusAPIToken != "" { + r.CirconusAPIToken = o.CirconusAPIToken + } + if o.CirconusAPIURL != "" { + r.CirconusAPIURL = o.CirconusAPIURL + } + if o.CirconusBrokerID != "" { + r.CirconusBrokerID = o.CirconusBrokerID + } + if o.CirconusBrokerSelectTag != "" { + r.CirconusBrokerSelectTag = o.CirconusBrokerSelectTag + } + if o.CirconusCheckDisplayName != "" { + r.CirconusCheckDisplayName = o.CirconusCheckDisplayName + } + r.CirconusCheckForceMetricActivation = o.CirconusCheckForceMetricActivation + if o.CirconusCheckID != "" { + r.CirconusCheckID = o.CirconusCheckID + } + if o.CirconusCheckInstanceID != "" { + r.CirconusCheckInstanceID = o.CirconusCheckInstanceID + } + if o.CirconusCheckSearchTag != "" { + r.CirconusCheckSearchTag = o.CirconusCheckSearchTag + } + if o.CirconusCheckTags != "" { + r.CirconusCheckTags = o.CirconusCheckTags + } + if o.CirconusSubmissionInterval != "" { + r.CirconusSubmissionInterval = o.CirconusSubmissionInterval + } + if o.CirconusSubmissionURL != "" { + r.CirconusSubmissionURL = o.CirconusSubmissionURL + } + r.DisableHostname = o.DisableHostname + if o.DogstatsdAddr != "" { + r.DogstatsdAddr = o.DogstatsdAddr + } + if len(o.DogstatsdTags) != 0 { + r.DogstatsdTags = o.DogstatsdTags + } + r.FilterDefault = o.FilterDefault + if len(o.AllowedPrefixes) != 0 { + r.AllowedPrefixes = o.AllowedPrefixes + } + if len(o.BlockedPrefixes) != 0 { + r.BlockedPrefixes = o.BlockedPrefixes + } + if o.MetricsPrefix != "" { + r.MetricsPrefix = o.MetricsPrefix + } + if o.StatsdAddr != "" { + r.StatsdAddr = o.StatsdAddr + } + if o.StatsiteAddr != "" { + r.StatsiteAddr = o.StatsiteAddr + } + + if o.PrometheusRetentionTime.Nanoseconds() > 0 { + r.PrometheusRetentionTime = o.PrometheusRetentionTime + } + if o.PrometheusPort != 0 { + r.PrometheusPort = o.PrometheusPort + } + + return r +} + +func (c *TelemetryConfig) GoString() string { + if c == nil { + return "(*TelemetryConfig)(nil)" + } + + circonusAPITokenState := "" + if c.CirconusAPIToken != "" { + circonusAPITokenState = "" + } + + return fmt.Sprintf("&TelemetryConfig{"+ + "Disable:%v, "+ + "CirconusAPIApp:%s, "+ + "CirconusAPIToken:%s, "+ + "CirconusAPIURL:%s, "+ + "CirconusBrokerID:%s, "+ + "CirconusBrokerSelectTag:%s, "+ + "CirconusCheckDisplayName:%s, "+ + "CirconusCheckForceMetricActivation:%s, "+ + "CirconusCheckID:%s, "+ + "CirconusCheckInstanceID:%s, "+ + "CirconusCheckSearchTag:%s, "+ + "CirconusCheckTags:%s, "+ + "CirconusSubmissionInterval:%s, "+ + "CirconusSubmissionURL:%s, "+ + "DisableHostname:%v, "+ + "DogstatsdAddr:%s, "+ + "DogstatsdTags:%v, "+ + "FilterDefault:%v, "+ + "AllowedPrefixes:%s, "+ + "BlockedPrefixes:%s, "+ + "MetricsPrefix:%s, "+ + "StatsdAddr:%s, "+ + "StatsiteAddr:%s, "+ + "PrometheusPort:%d, "+ + "PrometheusRetentionTime:%s}", + c.Disable, + c.CirconusAPIApp, + circonusAPITokenState, + c.CirconusAPIURL, + c.CirconusBrokerID, + c.CirconusBrokerSelectTag, + c.CirconusCheckDisplayName, + c.GetCirconusCheckForceMetricActivation(), + c.CirconusCheckID, + c.CirconusCheckInstanceID, + c.CirconusCheckSearchTag, + c.CirconusCheckTags, + c.CirconusSubmissionInterval, + c.CirconusSubmissionURL, + c.DisableHostname, + c.DogstatsdAddr, + c.DogstatsdTags, + c.FilterDefault, + c.AllowedPrefixes, + c.BlockedPrefixes, + c.MetricsPrefix, + c.StatsdAddr, + c.StatsiteAddr, + c.PrometheusPort, + c.PrometheusRetentionTime, + ) +} + +func (c *TelemetryConfig) Finalize() { + if c == nil { + return + } + if c.MetricsPrefix == "" { + c.MetricsPrefix = defaultMetricsPrefix + } + + c.AllowedPrefixes = append(c.AllowedPrefixes, c.MetricsPrefix) + + if c.PrometheusRetentionTime.Nanoseconds() < 1 { + c.PrometheusRetentionTime = 60 * time.Second + } +} + +func (c *TelemetryConfig) GetCirconusCheckForceMetricActivation() string { + return strconv.FormatBool(c.CirconusCheckForceMetricActivation) +} diff --git a/config/telemetry_test.go b/config/telemetry_test.go new file mode 100644 index 000000000..ab7701985 --- /dev/null +++ b/config/telemetry_test.go @@ -0,0 +1,136 @@ +package config + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPromConfigParsing(t *testing.T) { + configStr := "telemetry {" + + "prometheus_port = 9110" + + "prometheus_retention_time = \"120s\"" + + "allowed_prefixes = [\"keep\"]" + + "blocked_prefixes = [\"dont_keep\"]" + + "metrics_prefix = \"consul_template\"" + + "}" + + config, err := Parse(configStr) + require.NoError(t, err) + + require.Equal(t, 9110, config.Telemetry.PrometheusPort) + require.Equal(t, 120*time.Second, config.Telemetry.PrometheusRetentionTime) + require.Equal(t, "consul_template", config.Telemetry.MetricsPrefix) + require.Equal(t, "consul_template", config.Telemetry.MetricsPrefix) + require.ElementsMatch(t, []string{"keep"}, config.Telemetry.AllowedPrefixes) + require.ElementsMatch(t, []string{"dont_keep"}, config.Telemetry.BlockedPrefixes) + + config.Finalize() + require.Equal(t, 9110, config.Telemetry.PrometheusPort) + require.Equal(t, 120*time.Second, config.Telemetry.PrometheusRetentionTime) +} + +func TestTelemetryNilEmptyConfigMerge(t *testing.T) { + var nilConfig *TelemetryConfig + require.Nil(t, nilConfig.Merge(nil)) + + emptyConfig := &TelemetryConfig{} + require.Equal(t, emptyConfig, nilConfig.Merge(emptyConfig)) + require.Equal(t, emptyConfig, emptyConfig.Merge(nil)) +} + +func TestTelemetryPartialConfigMerge(t *testing.T) { + // Partial configuration merge test + partialConfigA := &TelemetryConfig{ + MetricsPrefix: "prefix", + Disable: true, + AllowedPrefixes: []string{"allowedPrefixA"}, + StatsdAddr: "statsA", + } + + partialConfigB := &TelemetryConfig{ + MetricsPrefix: "new_prefix", + Disable: false, + BlockedPrefixes: []string{"prefix"}, + } + + configC := partialConfigA.Merge(partialConfigB) + require.NotEqual(t, configC, partialConfigB) + + require.Equal(t, "new_prefix", configC.MetricsPrefix) + require.False(t, configC.Disable) + require.Equal(t, []string{"allowedPrefixA"}, configC.AllowedPrefixes) + require.Equal(t, []string{"prefix"}, configC.BlockedPrefixes) + require.Equal(t, "statsA", configC.StatsdAddr) +} + +func TestTelemetryFullConfigMerge(t *testing.T) { + configA := &TelemetryConfig{ + Disable: false, + CirconusAPIApp: "appA", + CirconusAPIToken: "tokenA", + CirconusAPIURL: "apiUrlA", + CirconusBrokerID: "brokerA", + CirconusBrokerSelectTag: "brokerTagA", + CirconusCheckDisplayName: "A", + CirconusCheckForceMetricActivation: false, + CirconusCheckID: "idA", + CirconusCheckInstanceID: "instanceA", + CirconusCheckSearchTag: "searchTagA", + CirconusCheckTags: "tagA", + CirconusSubmissionInterval: "1ms", + CirconusSubmissionURL: "urlA", + DisableHostname: false, + DogstatsdAddr: "addrA", + DogstatsdTags: []string{"dsTagA1", "dsTagA2"}, + FilterDefault: false, + AllowedPrefixes: []string{"allowedPrefixA"}, + BlockedPrefixes: []string{"blockedPrefixA"}, + MetricsPrefix: "prefixA", + StatsdAddr: "statsA", + StatsiteAddr: "statsiteA", + PrometheusPort: 8080, + PrometheusRetentionTime: 2 * time.Hour, + } + + configB := &TelemetryConfig{ + Disable: true, + CirconusAPIApp: "appB", + CirconusAPIToken: "tokenB", + CirconusAPIURL: "apiUrlB", + CirconusBrokerID: "brokerB", + CirconusBrokerSelectTag: "brokerTagB", + CirconusCheckDisplayName: "B", + CirconusCheckForceMetricActivation: true, + CirconusCheckID: "idB", + CirconusCheckInstanceID: "instanceB", + CirconusCheckSearchTag: "searchTagB", + CirconusCheckTags: "tagB", + CirconusSubmissionInterval: "1ms", + CirconusSubmissionURL: "urlB", + DisableHostname: true, + DogstatsdAddr: "addrB", + DogstatsdTags: []string{"dsTagB3"}, + FilterDefault: true, + AllowedPrefixes: []string{"allowedPrefixB"}, + BlockedPrefixes: []string{"blockedPrefixB"}, + MetricsPrefix: "prefixB", + StatsdAddr: "statsB", + StatsiteAddr: "statsiteB", + PrometheusPort: 9090, + PrometheusRetentionTime: 10 * time.Minute, + } + + assert.Equal(t, configB, configA.Merge(configB)) +} + +func TestTelemetryConfigGoString(t *testing.T) { + config := &TelemetryConfig{ + PrometheusRetentionTime: 1 * time.Minute, + } + expected := "&TelemetryConfig{Disable:false, CirconusAPIApp:, CirconusAPIToken:, CirconusAPIURL:, CirconusBrokerID:, CirconusBrokerSelectTag:, CirconusCheckDisplayName:, CirconusCheckForceMetricActivation:false, CirconusCheckID:, CirconusCheckInstanceID:, CirconusCheckSearchTag:, CirconusCheckTags:, CirconusSubmissionInterval:, CirconusSubmissionURL:, DisableHostname:false, DogstatsdAddr:, DogstatsdTags:[], FilterDefault:false, AllowedPrefixes:[], BlockedPrefixes:[], MetricsPrefix:, StatsdAddr:, StatsiteAddr:, PrometheusPort:0, PrometheusRetentionTime:1m0s}" + + assert.Equal(t, expected, config.GoString()) +} diff --git a/dependency/dependency.go b/dependency/dependency.go index 29ed744ad..385012173 100644 --- a/dependency/dependency.go +++ b/dependency/dependency.go @@ -49,6 +49,21 @@ const ( DefaultContextTimeout = 60 * time.Second ) +func (t Type) String() string { + switch t { + case TypeConsul: + return "consul" + case TypeVault: + return "vault" + case TypeLocal: + return "local" + case TypeNomad: + return "nomad" + default: + return "unknown" + } +} + // Dependency is an interface for a dependency that Consul Template is capable // of watching. type Dependency interface { diff --git a/docs/configuration.md b/docs/configuration.md index 3f9092c20..2842a0865 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -735,3 +735,52 @@ exec { [consul-kv]: https://www.consul.io/docs/agent/kv.html "Consul KV" [nomad]: https://www.nomadproject.io/ "Nomad by HashiCorp" [vault]: https://www.vaultproject.io/ "Vault by HashiCorp" + +## Telemetry + +This block is an HCL mapping to OpenTelemetry configurations for +various exporters. Configuring telemetry is only supported in +configuration files and not as CLI flags. Only one metric provider can +be used at a given time. More details on the metrics collected can be found +in the Telemetry section. + +```hcl +# These are example configurations for monitoring Consul Template metrics. +telemetry { + # General configuration + disable_hostname = true + filter_default = true + allowed_prefixes = ["consul_template."] + blocked_prefixes = ["go."] + metrics_prefix = "consul_template" + + # If you use Circonus + circonus_api_app = "consul" + circonus_api_token = "Your token" + circonus_api_url = "https://api.circonus.com/v2" + circonus_broker_id = "Your broker ID" + circonus_broker_select_tag = "dc:sfo" + circonus_check_display_name = "Your chosen display name" + circonus_check_force_metric_activation = false + circonus_check_id = "Your check ID" + circonus_check_instance_id = "hostname:app" + circonus_check_search_tag = "service:consul" + circonus_check_tags = "service:consul" + circonus_submission_interval = "10s" + circonus_submission_url = "https://a.circonus.submission.url.com" + + # If you use DogStatsD + dogstatsd_addr = "udp://127.0.0.1:8125" + dogstatsd_tags = ["my_tag_name:my_tag_value"] + + # If you use statsd + statsd_address = "statsd.company.local:8125" + + # If you use statsite + statsite_address = "statsite.company.local:8125" + + # If you use Prometheus + prometheus_retention_time = "" + prometheus_port = 8888 +} +``` diff --git a/go.mod b/go.mod index 28da4b3fd..43ec0ef98 100644 --- a/go.mod +++ b/go.mod @@ -33,19 +33,28 @@ require ( require ( dario.cat/mergo v1.0.0 github.com/Masterminds/sprig/v3 v3.2.3 + github.com/armon/go-metrics v0.4.1 github.com/hashicorp/vault/api/auth/kubernetes v0.5.0 + github.com/prometheus/client_golang v1.5.0 golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 golang.org/x/text v0.14.0 ) +require google.golang.org/protobuf v1.33.0 // indirect + require ( + github.com/DataDog/datadog-go v3.2.0+incompatible // indirect github.com/Masterminds/goutils v1.1.1 // indirect github.com/Masterminds/semver/v3 v3.2.0 // indirect - github.com/armon/go-metrics v0.4.1 // indirect + github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v3 v3.2.2 // indirect + github.com/cespare/xxhash/v2 v2.1.1 // indirect + github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible // indirect + github.com/circonus-labs/circonusllhist v0.1.3 // indirect github.com/coreos/go-systemd/v22 v22.5.0 github.com/fatih/color v1.17.0 // indirect github.com/go-jose/go-jose/v3 v3.0.3 // indirect + github.com/golang/protobuf v1.5.4 // indirect github.com/google/uuid v1.3.0 // indirect github.com/gorilla/websocket v1.5.0 // indirect github.com/hashicorp/cronexpr v1.1.1 // indirect @@ -62,13 +71,18 @@ require ( github.com/imdario/mergo v0.3.11 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect + github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect github.com/miekg/dns v1.1.50 // indirect github.com/mitchellh/copystructure v1.2.0 // indirect github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/prometheus/client_model v0.2.0 // indirect + github.com/prometheus/common v0.9.1 // indirect + github.com/prometheus/procfs v0.0.10 // indirect github.com/ryanuber/go-glob v1.0.0 // indirect github.com/shopspring/decimal v1.3.1 // indirect github.com/spf13/cast v1.5.0 // indirect + github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926 // indirect golang.org/x/net v0.24.0 // indirect golang.org/x/time v0.3.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index c3a330640..1ee936c98 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,7 @@ dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk= dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8= github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= +github.com/DataDog/datadog-go v3.2.0+incompatible h1:qSG2N4FghB1He/r2mFrWKCaL7dXCilEuNEeAn20fdD4= github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= @@ -21,13 +22,17 @@ github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/cenkalti/backoff/v3 v3.0.0/go.mod h1:cIeZDE3IrqwwJl6VUwCN6trj1oXrTS4rc0ij+ULvLYs= github.com/cenkalti/backoff/v3 v3.2.2 h1:cfUAAO3yvKMYKPrvhDuHSwQnhZNk/RMHKdZqKTxfm6M= github.com/cenkalti/backoff/v3 v3.2.2/go.mod h1:cIeZDE3IrqwwJl6VUwCN6trj1oXrTS4rc0ij+ULvLYs= +github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible h1:C29Ae4G5GtYyYMm1aztcyj/J5ckgJm2zwdDajFbx1NY= github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= +github.com/circonus-labs/circonusllhist v0.1.3 h1:TJH+oke8D16535+jHExHj4nQvzlZrj7ug5D7I/orNUA= github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= @@ -110,8 +115,6 @@ github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+l github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= github.com/hashicorp/go-retryablehttp v0.6.6/go.mod h1:vAew36LZh98gCBJNLH42IQ1ER/9wtLZZ8meHqQvEYWY= -github.com/hashicorp/go-retryablehttp v0.7.2 h1:AcYqCvkpalPnPF2pn0KamgwamS42TqUDDYFRKq/RAd0= -github.com/hashicorp/go-retryablehttp v0.7.2/go.mod h1:Jy/gPYAdjqffZ/yFGCFV2doI5wjtH1ewM9u8iYVjtX8= github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU= github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= github.com/hashicorp/go-rootcerts v1.0.2 h1:jzhAVGtqPKbwpyCPELlgNWhE1znq+qwJtW5Oi2viEzc= @@ -185,6 +188,7 @@ github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27k github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso= github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI= @@ -229,14 +233,20 @@ github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSg github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= +github.com/prometheus/client_golang v1.5.0 h1:Ctq0iGpCmr3jeP77kbF2UxgvRwzWWz+4Bh9/vJTyg1A= +github.com/prometheus/client_golang v1.5.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.9.1 h1:KOMtN28tlbam3/7ZKEYKHhKoJZYYj3gMH4uc62x7X7U= github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= +github.com/prometheus/procfs v0.0.10 h1:QJQN3jYQhkamO4mhfUWqdDH2asK7ONOI9MTWjyAxNKM= +github.com/prometheus/procfs v0.0.10/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= @@ -268,6 +278,7 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926 h1:G3dpKMzFDjgEh2q1Z7zUUtKa8ViPtH+ocF0bE0g00O8= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= diff --git a/manager/runner.go b/manager/runner.go index 8c16c727b..51b06ed22 100644 --- a/manager/runner.go +++ b/manager/runner.go @@ -20,6 +20,7 @@ import ( "github.com/hashicorp/consul-template/config" dep "github.com/hashicorp/consul-template/dependency" "github.com/hashicorp/consul-template/renderer" + "github.com/hashicorp/consul-template/telemetry" "github.com/hashicorp/consul-template/template" "github.com/hashicorp/consul-template/watch" @@ -256,6 +257,8 @@ func NewRunner(config *config.Config, dry bool) (*Runner, error) { func (r *Runner) Start() { log.Printf("[INFO] (runner) starting") + telemetry.CounterActions.Add(1, telemetry.NewLabel("action", "start")) + // Create the pid before doing anything. if err := r.storePid(); err != nil { r.ErrCh <- err @@ -540,6 +543,9 @@ func (r *Runner) internalStop(immediately bool) { } log.Printf("[INFO] (runner) stopping") + + telemetry.CounterActions.Add(1, telemetry.NewLabel("action", "stop")) + r.stopDedup() r.stopWatchers() r.stopChild(immediately) @@ -608,9 +614,16 @@ func (r *Runner) Receive(d dep.Dependency, data interface{}) { // https://github.com/hashicorp/consul-template/issues/198 // // and by "little" bug, I mean really big bug. - if _, ok := r.dependencies[d.String()]; ok { + depID := d.String() + if _, ok := r.dependencies[depID]; ok { log.Printf("[DEBUG] (runner) receiving dependency %s", d) r.brain.Remember(d, data) + + telemetry.CounterDependenciesReceived.Add( + 1, + telemetry.NewLabel("id", depID), + telemetry.NewLabel("type", d.Type().String()), + ) } } @@ -634,6 +647,7 @@ func (r *Runner) Signal(s os.Signal) error { // executed. func (r *Runner) Run() error { log.Printf("[DEBUG] (runner) initiating run") + telemetry.CounterActions.Add(1, telemetry.NewLabel("action", "run")) var newRenderEvent, wouldRenderAny, renderedAny bool runCtx := &templateRunCtx{ @@ -664,6 +678,15 @@ func (r *Runner) Run() error { if event.DidRender { renderedAny = true } + + label := getTelemetryLabel(event) + + // Report the template render event + telemetry.CounterTemplatesRendered.Add( + 1, + telemetry.NewLabel("id", tmpl.ID()), + telemetry.NewLabel("status", label), + ) } } @@ -696,6 +719,12 @@ func (r *Runner) Run() error { } } + // Report on number of commands executed and their statuses + numCommands := len(runCtx.commands) + failedCommands := len(errs) + telemetry.CounterCommandExecs.Add(float32(numCommands-failedCommands), telemetry.NewLabel("status", "success")) + telemetry.CounterCommandExecs.Add(float32(failedCommands), telemetry.NewLabel("status", "error")) + // Check if we need to deliver any rendered signals if wouldRenderAny || renderedAny { // Send the signal that a template got rendered @@ -1478,3 +1507,23 @@ func newWatcher(c *config.Config, clients *dep.ClientSet) *watch.Watcher { RetryFuncNomad: watch.RetryFunc(c.Nomad.Retry.RetryFunc()), }) } + +func getTelemetryLabel(event *RenderEvent) string { + var label string + + // Record that at least one template would have been rendered. + if event.WouldRender { + label = "would" + } + + // Record that at least one template was rendered. + if event.DidRender { + label = "rendered" + } + + if event.ForQuiescence { + label = "quiescence" + } + + return label +} diff --git a/telemetry/circonus.go b/telemetry/circonus.go new file mode 100644 index 000000000..a889678f7 --- /dev/null +++ b/telemetry/circonus.go @@ -0,0 +1,54 @@ +package telemetry + +import ( + "github.com/armon/go-metrics" + "github.com/armon/go-metrics/circonus" + "github.com/hashicorp/consul-template/config" +) + +/* + methods extracted from Consul telemetry: + https://github.com/hashicorp/consul/blob/main/lib/telemetry.go#L274 +*/ + +func circonusSink(cfg *config.TelemetryConfig, _ string) (metrics.MetricSink, error) { + token := cfg.CirconusAPIToken + url := cfg.CirconusSubmissionURL + if token == "" && url == "" { + return nil, nil + } + + conf := &circonus.Config{} + conf.Interval = cfg.CirconusSubmissionInterval + conf.CheckManager.API.TokenKey = token + conf.CheckManager.API.TokenApp = cfg.CirconusAPIApp + conf.CheckManager.API.URL = cfg.CirconusAPIURL + conf.CheckManager.Check.SubmissionURL = url + conf.CheckManager.Check.ID = cfg.CirconusCheckID + conf.CheckManager.Check.ForceMetricActivation = cfg.GetCirconusCheckForceMetricActivation() + conf.CheckManager.Check.InstanceID = cfg.CirconusCheckInstanceID + conf.CheckManager.Check.SearchTag = cfg.CirconusCheckSearchTag + conf.CheckManager.Check.DisplayName = cfg.CirconusCheckDisplayName + conf.CheckManager.Check.Tags = cfg.CirconusCheckTags + conf.CheckManager.Broker.ID = cfg.CirconusBrokerID + conf.CheckManager.Broker.SelectTag = cfg.CirconusBrokerSelectTag + + if conf.CheckManager.Check.DisplayName == "" { + conf.CheckManager.Check.DisplayName = "Consul" + } + + if conf.CheckManager.API.TokenApp == "" { + conf.CheckManager.API.TokenApp = "consul" + } + + if conf.CheckManager.Check.SearchTag == "" { + conf.CheckManager.Check.SearchTag = "service:consul" + } + + sink, err := circonus.NewCirconusSink(conf) + if err != nil { + return nil, err + } + sink.Start() + return sink, nil +} diff --git a/telemetry/dogstatsd.go b/telemetry/dogstatsd.go new file mode 100644 index 000000000..d175fc1ba --- /dev/null +++ b/telemetry/dogstatsd.go @@ -0,0 +1,25 @@ +package telemetry + +import ( + "github.com/armon/go-metrics" + "github.com/armon/go-metrics/datadog" + "github.com/hashicorp/consul-template/config" +) + +/* + methods extracted from Consul telemetry: + https://github.com/hashicorp/consul/blob/main/lib/telemetry.go#L248 +*/ + +func dogstatdSink(cfg *config.TelemetryConfig, hostname string) (metrics.MetricSink, error) { + addr := cfg.DogstatsdAddr + if addr == "" { + return nil, nil + } + sink, err := datadog.NewDogStatsdSink(addr, hostname) + if err != nil { + return nil, err + } + sink.SetTags(cfg.DogstatsdTags) + return sink, nil +} diff --git a/telemetry/metrics.go b/telemetry/metrics.go new file mode 100644 index 000000000..ae5899ec8 --- /dev/null +++ b/telemetry/metrics.go @@ -0,0 +1,51 @@ +package telemetry + +import ( + "github.com/armon/go-metrics" +) + +type CounterMetric struct { + Names []string + Description string + ConstLabels []metrics.Label +} + +func (m *CounterMetric) Add(val float32, labels ...metrics.Label) { + metrics.IncrCounterWithLabels(m.Names, val, labels) +} + +// Counters +var CounterDependenciesReceived = CounterMetric{ + Names: []string{"dependencies_received"}, + ConstLabels: []metrics.Label{}, + Description: "A counter of dependencies received with labels " + + "type=(consul|vault|local) and id=dependencyString", +} +var CounterTemplatesRendered = CounterMetric{ + Names: []string{"templates_rendered"}, + ConstLabels: []metrics.Label{}, + Description: "A counter of templates rendered with labels " + + "id=templateID and status=(rendered|would|quiescence)", +} + +var CounterActions = CounterMetric{ + Names: []string{"runner_actions"}, + ConstLabels: []metrics.Label{}, + Description: "A count of runner actions with labels action=(start|stop|run)", +} +var CounterCommandExecs = CounterMetric{ + Names: []string{"commands_exec"}, + ConstLabels: []metrics.Label{}, + Description: "The number of commands executed with labels status=(success|error)", +} + +func NewLabel(name string, value string) metrics.Label { + return metrics.Label{Name: name, Value: value} +} + +func InitMetrics() { + CounterDependenciesReceived.Add(0) + CounterTemplatesRendered.Add(0) + CounterActions.Add(0) + CounterCommandExecs.Add(0) +} diff --git a/telemetry/prometheus.go b/telemetry/prometheus.go new file mode 100644 index 000000000..10a9997e6 --- /dev/null +++ b/telemetry/prometheus.go @@ -0,0 +1,52 @@ +package telemetry + +import ( + "fmt" + "log" + "net/http" + + "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" + "github.com/hashicorp/consul-template/config" + prom "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +/* + methods based on Consul telemetry: + https://github.com/hashicorp/consul/blob/main/lib/telemetry.go#L261 +*/ + +func PrometheusSink(cfg *config.TelemetryConfig, _ string) (metrics.MetricSink, error) { + if cfg.PrometheusPort == 0 { + return nil, nil + } + + sink, err := prometheus.NewPrometheusSinkFrom(prometheus.PrometheusOpts{ + Expiration: cfg.PrometheusRetentionTime, + }) + + if err != nil { + return nil, err + } + + runPrometheusMetricServer(cfg.PrometheusPort) + + return sink, nil +} + +func runPrometheusMetricServer(prometheusPort int) { + handlerOptions := promhttp.HandlerOpts{ + ErrorHandling: promhttp.ContinueOnError, + } + + go func() { + log.Println("[INFO] (prometheus) running prom server") + handler := promhttp.HandlerFor(prom.DefaultGatherer, handlerOptions) + http.Handle("/metrics", handler) + err := http.ListenAndServe(fmt.Sprintf(":%d", prometheusPort), nil) + if err != nil { + log.Printf("[ERROR] (prometheus) error thrown by the metric server: %v", err) + } + }() +} diff --git a/telemetry/statsd.go b/telemetry/statsd.go new file mode 100644 index 000000000..bac0d929b --- /dev/null +++ b/telemetry/statsd.go @@ -0,0 +1,19 @@ +package telemetry + +import ( + "github.com/armon/go-metrics" + "github.com/hashicorp/consul-template/config" +) + +/* + methods extracted from Consul telemetry: + https://github.com/hashicorp/consul/blob/main/lib/telemetry.go#L240 +*/ + +func statsdSink(cfg *config.TelemetryConfig, _ string) (metrics.MetricSink, error) { + addr := cfg.StatsdAddr + if addr == "" { + return nil, nil + } + return metrics.NewStatsdSink(addr) +} diff --git a/telemetry/statsite.go b/telemetry/statsite.go new file mode 100644 index 000000000..0088bdd0a --- /dev/null +++ b/telemetry/statsite.go @@ -0,0 +1,19 @@ +package telemetry + +import ( + "github.com/armon/go-metrics" + "github.com/hashicorp/consul-template/config" +) + +/* + methods extracted from Consul telemetry: + https://github.com/hashicorp/consul/blob/main/lib/telemetry.go#L232 +*/ + +func statsiteSink(cfg *config.TelemetryConfig, _ string) (metrics.MetricSink, error) { + addr := cfg.StatsiteAddr + if addr == "" { + return nil, nil + } + return metrics.NewStatsiteSink(addr) +} diff --git a/telemetry/telemetry.go b/telemetry/telemetry.go new file mode 100644 index 000000000..99a933ac3 --- /dev/null +++ b/telemetry/telemetry.go @@ -0,0 +1,99 @@ +package telemetry + +/* + methods based on Consul telemetry: + https://github.com/hashicorp/consul/blob/main/lib/telemetry.go +*/ + +import ( + "context" + "net/http" + "time" + + "github.com/armon/go-metrics" + "github.com/hashicorp/go-multierror" + + "github.com/hashicorp/consul-template/config" +) + +// MetricsHandler provides an http.Handler for displaying metrics. +type MetricsHandler interface { + DisplayMetrics(resp http.ResponseWriter, req *http.Request) (interface{}, error) + Stream(ctx context.Context, encoder metrics.Encoder) +} + +type Telemetry struct { + Handler MetricsHandler + cancelFn context.CancelFunc +} + +func (tel *Telemetry) Stop() { + if tel.cancelFn != nil { + tel.cancelFn() + } +} + +func computeMetricsConfig(telemetryConf *config.TelemetryConfig) *metrics.Config { + metricsConf := metrics.DefaultConfig(telemetryConf.MetricsPrefix) + metricsConf.EnableHostname = !telemetryConf.DisableHostname + metricsConf.FilterDefault = telemetryConf.FilterDefault + metricsConf.AllowedPrefixes = telemetryConf.AllowedPrefixes + metricsConf.BlockedPrefixes = telemetryConf.BlockedPrefixes + return metricsConf +} + +func setupSinks(telemetryConf *config.TelemetryConfig, hostname string) (metrics.FanoutSink, error) { + var sinks metrics.FanoutSink + var errors *multierror.Error + addSink := func(fn func(*config.TelemetryConfig, string) (metrics.MetricSink, error)) { + s, err := fn(telemetryConf, hostname) + if err != nil { + errors = multierror.Append(errors, err) + return + } + if s != nil { + sinks = append(sinks, s) + } + } + + addSink(statsiteSink) + addSink(statsdSink) + addSink(dogstatdSink) + addSink(circonusSink) + addSink(PrometheusSink) + + return sinks, errors.ErrorOrNil() +} + +// Init configures go-metrics based on map of telemetry config +// values as returned by Runtimecfg.Config(). +// Init retries configurating the sinks in case error is retriable +// and retry_failed_connection is set to true. +func Init(cfg *config.TelemetryConfig) (*Telemetry, error) { + if cfg.Disable { + return &Telemetry{}, nil + } + + memSink := metrics.NewInmemSink(10*time.Second, time.Minute) + metrics.DefaultInmemSignal(memSink) + + metricsConf := computeMetricsConfig(cfg) + + sinks, errs := setupSinks(cfg, metricsConf.HostName) + if errs != nil { + return nil, errs + } + sinks = append(sinks, memSink) + + metricsServer, err := metrics.NewGlobal(metricsConf, sinks) + if err != nil { + return nil, err + } + + telemetry := &Telemetry{ + Handler: memSink, + cancelFn: metricsServer.Shutdown, + } + + return telemetry, nil +} diff --git a/telemetry/telemetry_test.go b/telemetry/telemetry_test.go new file mode 100644 index 000000000..65f797785 --- /dev/null +++ b/telemetry/telemetry_test.go @@ -0,0 +1,132 @@ +package telemetry + +import ( + "fmt" + "io" + "net" + "net/http" + "slices" + "strconv" + "strings" + "testing" + "time" + + "github.com/hashicorp/consul-template/config" + "github.com/stretchr/testify/require" +) + +func newCfg() *config.TelemetryConfig { + return &config.TelemetryConfig{ + StatsdAddr: "statsd.host:1234", + StatsiteAddr: "statsite.host:1234", + DogstatsdAddr: "mydog.host:8125", + } +} + +func TestConfigureSinks(t *testing.T) { + cfg := newCfg() + sinks, err := setupSinks(cfg, "") + require.Error(t, err) + // 3 sinks: statsd, statsite, inmem + require.Equal(t, 2, len(sinks)) + + cfg = &config.TelemetryConfig{ + DogstatsdAddr: "", + } + _, err = setupSinks(cfg, "") + require.NoError(t, err) + +} + +func TestPrometheusMetrics(t *testing.T) { + + // expected metrics based on the first metric-related PR: + // https://github.com/hashicorp/consul-template/pull/1378/files#diff-b335630551682c19a781afebcf4d07bf978fb1f8ac04c6bf87428ed5106870f5R2680 + expectedMetrics := []string{ + "# HELP consul_template_commands_exec The number of commands executed with labels status=(success|error)", + "# TYPE consul_template_commands_exec counter", + "consul_template_commands_exec{status=\"error\"} 0", + "consul_template_commands_exec{status=\"success\"} 1", + "# HELP consul_template_dependencies_received A counter of dependencies received with labels type=(consul|vault|local) and id=dependencyString", + "# TYPE consul_template_dependencies_received counter", + "consul_template_dependencies_received{id=\"kv.block(hello)\",type=\"consul\"} 1", + "# HELP consul_template_runner_actions A count of runner actions with labels action=(start|stop|run)", + "# TYPE consul_template_runner_actions counter", + "consul_template_runner_actions{action=\"run\"} 2", + "consul_template_runner_actions{action=\"start\"} 1", + "# HELP consul_template_templates_rendered A counter of templates rendered with labels id=templateID and status=(rendered|would|quiescence)", + "# TYPE consul_template_templates_rendered counter", + "consul_template_templates_rendered{id=\"aadcafd7f28f1d9fc5e76ab2e029f844\",status=\"rendered\"} 1", + } + + l, err := net.Listen("tcp", ":0") + require.NoError(t, err) + + address := l.Addr().String() + portStr := address[strings.LastIndex(address, ":")+1:] + + err = l.Close() + require.NoError(t, err) + + port, err := strconv.Atoi(portStr) + require.NoError(t, err) + + cfg := config.TelemetryConfig{ + PrometheusRetentionTime: 60 * time.Second, + PrometheusPort: port, + } + + _, err = Init(&cfg) + require.NoError(t, err) + + CounterCommandExecs.Add(0, NewLabel("status", "error")) + CounterCommandExecs.Add(1, NewLabel("status", "success")) + CounterDependenciesReceived.Add(1, NewLabel("id", "kv.block(hello)"), NewLabel("type", "consul")) + CounterActions.Add(2, NewLabel("action", "run")) + CounterActions.Add(1, NewLabel("action", "start")) + CounterTemplatesRendered.Add(1, NewLabel("id", "aadcafd7f28f1d9fc5e76ab2e029f844"), NewLabel("status", "rendered")) + + httpClient := http.DefaultClient + + resp, err := httpClient.Get(fmt.Sprintf("http://localhost:%d/metrics", port)) + require.NoError(t, err) + + b, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + actualMetrics := strings.Split(string(b), "\n") + + missingActualMetrics := []string{} + + prefixes := []string{"# HELP go_", "# TYPE go_", "go_", "# HELP process_", "# TYPE process_", "process_"} + for _, actualMetric := range actualMetrics { + if slices.ContainsFunc(prefixes, func(p string) bool { return strings.HasPrefix(actualMetric, p) }) || + actualMetric == "" { + continue + } + + contained := false + for _, expectedMetric := range expectedMetrics { + if actualMetric == expectedMetric { + contained = true + break + } + } + + if !contained { + missingActualMetrics = append(missingActualMetrics, actualMetric) + } + } + + t.Log(len(missingActualMetrics)) + require.Emptyf(t, missingActualMetrics, "The following metrics are missing:\n - %s", strings.Join(missingActualMetrics, "\n - ")) + +} + +func TestInitWithEmptyConfig(t *testing.T) { + cfg := &config.Config{} + cfg.Finalize() + _, err := Init(cfg.Telemetry) + require.NoError(t, err) + +} diff --git a/test/helpers.go b/test/helpers.go index f270a4d49..32b16b6cc 100644 --- a/test/helpers.go +++ b/test/helpers.go @@ -4,6 +4,7 @@ package test import ( + "log" "os" "strings" "sync" @@ -97,11 +98,14 @@ func (t *TestingTB) DoCleanup() { t.cleanup() } -func (*TestingTB) Failed() bool { return false } -func (*TestingTB) Logf(string, ...interface{}) {} -func (*TestingTB) Fatalf(string, ...interface{}) {} -func (*TestingTB) Name() string { return "TestingTB" } -func (*TestingTB) Helper() {} +func (*TestingTB) Failed() bool { return false } +func (*TestingTB) Logf(format string, v ...interface{}) { + log.Printf(format, v...) +} +func (*TestingTB) Fatalf(format string, v ...interface{}) { + log.Fatalf(format, v...) +} +func (*TestingTB) Name() string { return "TestingTB" } func (t *TestingTB) Cleanup(f func()) { t.Lock() defer t.Unlock() @@ -121,3 +125,4 @@ func (*TestingTB) Fatal(...any) {} func (*TestingTB) Log(...any) {} func (*TestingTB) Setenv(string, string) {} func (*TestingTB) TempDir() string { return "" } +func (*TestingTB) Helper() {}