Skip to content

Commit

Permalink
Add UTF-8 support in metric and label names (#689)
Browse files Browse the repository at this point in the history
Allow UTF-8 escaped characters in label and metric names

This changes the validation in general and also adds a new escaping scheme
for URL parameters (the `U__` encoding used to escape names for legacy systems,
which unfortunately is different from the base64 encoding we already use for
label _values).

The behavior is opt-in via a flag because there are valid legacy names that 
could also be seen as encoded versions of new names with special UTF-8 characters.

Signed-off-by: Federico Torres <[email protected]>

---------

Signed-off-by: Federico Torres <[email protected]>
  • Loading branch information
fedetorres93 authored Oct 2, 2024
1 parent c48dde7 commit c408b8e
Show file tree
Hide file tree
Showing 4 changed files with 369 additions and 3 deletions.
33 changes: 33 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,39 @@ Examples:

/metrics/job/titan/name@base64/zqDPgc6_zrzOt864zrXPjc-C

### UTF-8 support for metric and label names

Newer versions of the Prometheus exposition formats (text and protobuf)
support the full UTF-8 character set in metric and label names. The
Pushgateway only accepts special characters in names if the command line
flag `--push.enable-utf8-names` is set.
To allow special characters in label names that are part of the URL path, the flag also enables a
[specific encoding mechanism](https://github.com/prometheus/proposals/blob/main/proposals/2023-08-21-utf8.md#text-escaping). This is similar to the base64 encoding for label _values_ described above,
but works differently in detail for technical and historical reasons. As before, client libraries
should usually take care of the encoding. It works as follows:

* A label name containing encoded characters starts with `U__`.
* All non-standard characters (i.e. characters other than letters, numbers, and underscores) are encoded as underscores surrounding their Unicode value, like `_1F60A_`.
* All pre-existing underscores are encoded as a double-underscore: `__`.
* If a label name starts with `U__` already, these characters have to be encoded as well, resulting in `U___55_____`. (That's `U__` + `_55_` (for `U`) + `__` + `__`).
* A label name starting with `U__` in it's encoded form, but containing invalid sequences (e.g. `U__in_xxx_valid`) is left unchanged.

For example, the label `"foo.bar"="baz"` would be encoded like:

/metrics/job/example/U__foo_2e_bar/baz

This encoding is compatible with the base64 encoding for label values:

/metrics/job/example/U__foo_2e_bar@base64/YmF6

Note that this method has an unlikely edge case that is not handled properly:
A pusher unaware of the encoding mechanism might use a label name that is
also a valid encoded version of another label name.
E.g. if a pusher intends to use the label name `U__foo_2e_bar`, but doesn't
encode it as `U___55_____foo__2e__bar`, the Pushgateway will decode
`U__foo_2e_bar` to `foo.bar`. This is the main reason why the decoding is
opt-in via the `--push.enable-utf8-names` flag.

### `PUT` method

`PUT` is used to push a group of metrics. All metrics with the
Expand Down
316 changes: 316 additions & 0 deletions handler/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

"github.com/go-kit/log"
"github.com/matttproud/golang_protobuf_extensions/pbutil"
"github.com/prometheus/common/model"
"github.com/prometheus/common/route"
"google.golang.org/protobuf/encoding/prototext"
"google.golang.org/protobuf/proto"
Expand Down Expand Up @@ -436,6 +437,195 @@ func TestPush(t *testing.T) {
verifyMetricFamily(t, `name:"histogram_metric" type:HISTOGRAM metric:{histogram:{sample_count_float:20 sample_sum:99.23 schema:1 negative_span:{offset:0 length:2} negative_span:{offset:0 length:2} negative_count:2 negative_count:2 negative_count:-2 negative_count:0 positive_span:{offset:0 length:2} positive_span:{offset:0 length:2} positive_count:2 positive_count:2 positive_count:-2 positive_count:0}}`, mms.lastWriteRequest.MetricFamilies["histogram_metric"])
}

func TestPushUTF8(t *testing.T) {
model.NameValidationScheme = model.UTF8Validation
EscapingScheme = model.ValueEncodingEscaping
mms := MockMetricStore{}
handler := Push(&mms, false, true, false, logger)
handlerBase64 := Push(&mms, false, true, true, logger)

// With job name, instance name, UTF-8 escaped label name in params, UTF-8 metric name and text content.
mms.lastWriteRequest = storage.WriteRequest{}
req, err := http.NewRequest(
"POST", "http://example.org/",
bytes.NewBufferString("some_metric 3.14\n{\"another.metric\",instance=\"testinstance\",job=\"testjob\",\"dotted.label.name\"=\"mylabelvalue\"} 42\n"),
)
if err != nil {
t.Fatal(err)
}
w := httptest.NewRecorder()

params := map[string]string{
"job": "testjob",
"labels": "/instance/testinstance/U__dotted_2e_label_2e_name/mylabelvalue",
}

handler(w, req.WithContext(ctxWithParams(params, req)))
if expected, got := http.StatusOK, w.Code; expected != got {
t.Errorf("Wanted status code %v, got %v.", expected, got)
}
if mms.lastWriteRequest.Timestamp.IsZero() {
t.Errorf("Write request timestamp not set: %#v", mms.lastWriteRequest)
}
if expected, got := "testjob", mms.lastWriteRequest.Labels["job"]; expected != got {
t.Errorf("Wanted job %v, got %v.", expected, got)
}
if expected, got := "testinstance", mms.lastWriteRequest.Labels["instance"]; expected != got {
t.Errorf("Wanted instance %v, got %v.", expected, got)
}
if expected, got := "mylabelvalue", mms.lastWriteRequest.Labels["dotted.label.name"]; expected != got {
t.Errorf("Wanted dotted.label.name %v, got %v.", expected, got)
}
// Note that sanitation hasn't happened yet, grouping labels not in request.
verifyMetricFamily(t, `name:"some_metric" type:UNTYPED metric:{untyped:{value:3.14}}`, mms.lastWriteRequest.MetricFamilies["some_metric"])
verifyMetricFamily(t, `name:"another.metric" type:UNTYPED metric:{label:{name:"instance" value:"testinstance"} label:{name:"job" value:"testjob"} label:{name:"dotted.label.name" value:"mylabelvalue"} untyped:{value:42}}`, mms.lastWriteRequest.MetricFamilies["another.metric"])

// With base64-encoded label values, UTF-8 escaped label name in params, UTF-8 metric name and text content.
mms.lastWriteRequest = storage.WriteRequest{}
req, err = http.NewRequest(
"POST", "http://example.org/",
bytes.NewBufferString("some_metric 3.14\n{\"another.metric\",instance=\"testinstance\",job=\"testjob\",\"dotted.label.name\"=\"mylabelvalue\"} 42\n"),
)
if err != nil {
t.Fatal(err)
}
w = httptest.NewRecorder()
params = map[string]string{
"job": "dGVzdC9qb2I=", // job="test/job"
"labels": "/instance@base64/dGVzdGluc3RhbmNl/U__dotted_2e_label_2e_name@base64/bXlsYWJlbHZhbHVl", // instance="testinstance", dotted.label.name="mylabelvalue"
}
handlerBase64(w, req.WithContext(ctxWithParams(params, req)))
if expected, got := http.StatusOK, w.Code; expected != got {
t.Errorf("Wanted status code %v, got %v.", expected, got)
}
if mms.lastWriteRequest.Timestamp.IsZero() {
t.Errorf("Write request timestamp not set: %#v", mms.lastWriteRequest)
}
if expected, got := "test/job", mms.lastWriteRequest.Labels["job"]; expected != got {
t.Errorf("Wanted job %v, got %v.", expected, got)
}
if expected, got := "testinstance", mms.lastWriteRequest.Labels["instance"]; expected != got {
t.Errorf("Wanted instance %v, got %v.", expected, got)
}
if expected, got := "mylabelvalue", mms.lastWriteRequest.Labels["dotted.label.name"]; expected != got {
t.Errorf("Wanted dotted.label.name %v, got %v.", expected, got)
}
// Note that sanitation hasn't happened yet, grouping labels not in request.
verifyMetricFamily(t, `name:"some_metric" type:UNTYPED metric:{untyped:{value:3.14}}`, mms.lastWriteRequest.MetricFamilies["some_metric"])
// Note that sanitation hasn't happened yet, job label as still as in the push, not aligned to grouping labels.
verifyMetricFamily(t, `name:"another.metric" type:UNTYPED metric:{label:{name:"instance" value:"testinstance"} label:{name:"job" value:"testjob"} label:{name:"dotted.label.name" value:"mylabelvalue"} untyped:{value:42}}`, mms.lastWriteRequest.MetricFamilies["another.metric"])

// With job name, instance name, UTF-8 escaped label name in params, UTF-8 metric names and protobuf content.
mms.lastWriteRequest = storage.WriteRequest{}
buf := &bytes.Buffer{}
_, err = pbutil.WriteDelimited(buf, &dto.MetricFamily{
Name: proto.String("some.metric"),
Type: dto.MetricType_UNTYPED.Enum(),
Metric: []*dto.Metric{
{
Untyped: &dto.Untyped{
Value: proto.Float64(1.234),
},
},
},
})
if err != nil {
t.Fatal(err)
}

_, err = pbutil.WriteDelimited(buf, &dto.MetricFamily{
Name: proto.String("another.metric"),
Type: dto.MetricType_UNTYPED.Enum(),
Metric: []*dto.Metric{
{
Untyped: &dto.Untyped{
Value: proto.Float64(3.14),
},
},
},
})
if err != nil {
t.Fatal(err)
}

_, err = pbutil.WriteDelimited(buf, &dto.MetricFamily{
Name: proto.String("histogram.metric"),
Type: dto.MetricType_HISTOGRAM.Enum(),
Metric: []*dto.Metric{
{
Histogram: &dto.Histogram{
SampleCountFloat: proto.Float64(20),
SampleSum: proto.Float64(99.23),
Schema: proto.Int32(1),
NegativeCount: []float64{2, 2, -2, 0},
PositiveCount: []float64{2, 2, -2, 0},
PositiveSpan: []*dto.BucketSpan{
{
Offset: proto.Int32(0),
Length: proto.Uint32(2),
},
{
Offset: proto.Int32(0),
Length: proto.Uint32(2),
},
},
NegativeSpan: []*dto.BucketSpan{
{
Offset: proto.Int32(0),
Length: proto.Uint32(2),
},
{
Offset: proto.Int32(0),
Length: proto.Uint32(2),
},
},
},
},
},
})
if err != nil {
t.Fatal(err)
}

req, err = http.NewRequest(
"POST", "http://example.org/", buf,
)
if err != nil {
t.Fatal(err)
}
req.Header.Set("Content-Type", "application/vnd.google.protobuf; encoding=delimited; proto=io.prometheus.client.MetricFamily")
w = httptest.NewRecorder()
params = map[string]string{
"job": "testjob",
"labels": "/instance/testinstance/U__dotted_2e_label_2e_name/mylabelvalue",
}
handler(w, req.WithContext(ctxWithParams(params, req)))
if expected, got := http.StatusOK, w.Code; expected != got {
t.Errorf("Wanted status code %v, got %v.", expected, got)
}
if mms.lastWriteRequest.Timestamp.IsZero() {
t.Errorf("Write request timestamp not set: %#v", mms.lastWriteRequest)
}
if expected, got := "testjob", mms.lastWriteRequest.Labels["job"]; expected != got {
t.Errorf("Wanted job %v, got %v.", expected, got)
}
if expected, got := "testinstance", mms.lastWriteRequest.Labels["instance"]; expected != got {
t.Errorf("Wanted instance %v, got %v.", expected, got)
}
if expected, got := "mylabelvalue", mms.lastWriteRequest.Labels["dotted.label.name"]; expected != got {
t.Errorf("Wanted instance %v, got %v.", expected, got)
}
// Note that sanitation hasn't happened yet, grouping labels not in request.
verifyMetricFamily(t, `name:"some.metric" type:UNTYPED metric:{untyped:{value:1.234}}`, mms.lastWriteRequest.MetricFamilies["some.metric"])
// Note that sanitation hasn't happened yet, grouping labels not in request.
verifyMetricFamily(t, `name:"another.metric" type:UNTYPED metric:{untyped:{value:3.14}}`, mms.lastWriteRequest.MetricFamilies["another.metric"])
// Note that sanitation hasn't happened yet, grouping labels not in request.
verifyMetricFamily(t, `name:"histogram.metric" type:HISTOGRAM metric:{histogram:{sample_count_float:20 sample_sum:99.23 schema:1 negative_span:{offset:0 length:2} negative_span:{offset:0 length:2} negative_count:2 negative_count:2 negative_count:-2 negative_count:0 positive_span:{offset:0 length:2} positive_span:{offset:0 length:2} positive_count:2 positive_count:2 positive_count:-2 positive_count:0}}`, mms.lastWriteRequest.MetricFamilies["histogram.metric"])

model.NameValidationScheme = model.LegacyValidation
EscapingScheme = model.NoEscaping
}

func TestDelete(t *testing.T) {
mms := MockMetricStore{}
handler := Delete(&mms, false, logger)
Expand Down Expand Up @@ -525,6 +715,71 @@ func TestDelete(t *testing.T) {

}

func TestDeleteUTF8(t *testing.T) {
model.NameValidationScheme = model.UTF8Validation
EscapingScheme = model.ValueEncodingEscaping
mms := MockMetricStore{}
handler := Delete(&mms, false, logger)
handlerBase64 := Delete(&mms, true, logger)
req := &http.Request{}
var params map[string]string

// With job name, instance name and UTF-8 escaped label name.
mms.lastWriteRequest = storage.WriteRequest{}
w := httptest.NewRecorder()

params = map[string]string{
"job": "testjob",
"labels": "/instance/testinstance/U__dotted_2e_label_2e_name/mylabelvalue",
}

handler(w, req.WithContext(ctxWithParams(params, req)))
if expected, got := http.StatusAccepted, w.Code; expected != got {
t.Errorf("Wanted status code %v, got %v.", expected, got)
}
if mms.lastWriteRequest.Timestamp.IsZero() {
t.Errorf("Write request timestamp not set: %#v", mms.lastWriteRequest)
}
if expected, got := "testjob", mms.lastWriteRequest.Labels["job"]; expected != got {
t.Errorf("Wanted job %v, got %v.", expected, got)
}
if expected, got := "testinstance", mms.lastWriteRequest.Labels["instance"]; expected != got {
t.Errorf("Wanted instance %v, got %v.", expected, got)
}
if expected, got := "mylabelvalue", mms.lastWriteRequest.Labels["dotted.label.name"]; expected != got {
t.Errorf("Wanted instance %v, got %v.", expected, got)
}

// With base64-encoded label values and UTF-8 escaped label name.
mms.lastWriteRequest = storage.WriteRequest{}
w = httptest.NewRecorder()

params = map[string]string{
"job": "dGVzdC9qb2I=",
"labels": "/instance@base64/dGVzdGluc3RhbmNl/U__dotted_2e_label_2e_name@base64/bXlsYWJlbHZhbHVl",
}

handlerBase64(w, req.WithContext(ctxWithParams(params, req)))
if expected, got := http.StatusAccepted, w.Code; expected != got {
t.Errorf("Wanted status code %v, got %v.", expected, got)
}
if mms.lastWriteRequest.Timestamp.IsZero() {
t.Errorf("Write request timestamp not set: %#v", mms.lastWriteRequest)
}
if expected, got := "test/job", mms.lastWriteRequest.Labels["job"]; expected != got {
t.Errorf("Wanted job %v, got %v.", expected, got)
}
if expected, got := "testinstance", mms.lastWriteRequest.Labels["instance"]; expected != got {
t.Errorf("Wanted instance %v, got %v.", expected, got)
}
if expected, got := "mylabelvalue", mms.lastWriteRequest.Labels["dotted.label.name"]; expected != got {
t.Errorf("Wanted instance %v, got %v.", expected, got)
}

model.NameValidationScheme = model.LegacyValidation
EscapingScheme = model.NoEscaping
}

func TestSplitLabels(t *testing.T) {
scenarios := map[string]struct {
input string
Expand Down Expand Up @@ -575,6 +830,13 @@ func TestSplitLabels(t *testing.T) {
input: "/label_name1@base64/foo.bar/label_name2/label_value2",
expectError: true,
},
"regular label and UTF-8 escaped label name with legacy validation": {
input: "/label_name1/label_value1/U__label_2e_name2/label_value2",
expectedOutput: map[string]string{
"label_name1": "label_value1",
"U__label_2e_name2": "label_value2",
},
},
}

for name, scenario := range scenarios {
Expand Down Expand Up @@ -603,6 +865,60 @@ func TestSplitLabels(t *testing.T) {
}
}

func TestSplitLabelsUTF8(t *testing.T) {
scenarios := map[string]struct {
input string
expectError bool
expectedOutput map[string]string
}{
"regular label and UTF-8 escaped label name": {
input: "/label_name1/label_value1/U__label_2e_name2/label_value2",
expectedOutput: map[string]string{
"label_name1": "label_value1",
"label.name2": "label_value2",
},
},
"encoded slash in both label values and UTF-8 escaped label name": {
input: "/label_name1@base64/bGFiZWwvdmFsdWUx/U__label_2e_name2@base64/bGFiZWwvdmFsdWUy",
expectedOutput: map[string]string{
"label_name1": "label/value1",
"label.name2": "label/value2",
},
},
}

model.NameValidationScheme = model.UTF8Validation
EscapingScheme = model.ValueEncodingEscaping

for name, scenario := range scenarios {
t.Run(name, func(t *testing.T) {
parsed, err := splitLabels(scenario.input)
if err != nil {
if scenario.expectError {
return // All good.
}
t.Fatalf("Got unexpected error: %s.", err)
}
for k, v := range scenario.expectedOutput {
got, ok := parsed[k]
if !ok {
t.Errorf("Expected to find %s=%q.", k, v)
}
if got != v {
t.Errorf("Expected %s=%q but got %s=%q.", k, v, k, got)
}
delete(parsed, k)
}
for k, v := range parsed {
t.Errorf("Found unexpected label %s=%q.", k, v)
}
})
}

model.NameValidationScheme = model.LegacyValidation
EscapingScheme = model.NoEscaping
}

func TestWipeMetricStore(t *testing.T) {
// Create MockMetricStore with a few GroupingKeyToMetricGroup metrics
// so they can be returned by GetMetricFamiliesMap() to later send write
Expand Down
Loading

0 comments on commit c408b8e

Please sign in to comment.