Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for native histograms in OM parser #1040

Merged
merged 11 commits into from
Sep 20, 2024
4 changes: 3 additions & 1 deletion prometheus_client/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
SummaryMetricFamily, UnknownMetricFamily, UntypedMetricFamily,
)
from .registry import CollectorRegistry, REGISTRY
from .samples import Exemplar, Sample, Timestamp
from .samples import BucketSpan, Exemplar, NativeHistogram, Sample, Timestamp

__all__ = (
'BucketSpan',
'CollectorRegistry',
'Counter',
'CounterMetricFamily',
Expand All @@ -21,6 +22,7 @@
'Info',
'InfoMetricFamily',
'Metric',
'NativeHistogram',
'REGISTRY',
'Sample',
'StateSetMetricFamily',
Expand Down
8 changes: 4 additions & 4 deletions prometheus_client/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,8 @@ def describe(self) -> Iterable[Metric]:

def collect(self) -> Iterable[Metric]:
metric = self._get_metric()
for suffix, labels, value, timestamp, exemplar in self._samples():
metric.add_sample(self._name + suffix, labels, value, timestamp, exemplar)
for suffix, labels, value, timestamp, exemplar, native_histogram_value in self._samples():
metric.add_sample(self._name + suffix, labels, value, timestamp, exemplar, native_histogram_value)
return [metric]

def __str__(self) -> str:
Expand Down Expand Up @@ -246,8 +246,8 @@ def _multi_samples(self) -> Iterable[Sample]:
metrics = self._metrics.copy()
for labels, metric in metrics.items():
series_labels = list(zip(self._labelnames, labels))
for suffix, sample_labels, value, timestamp, exemplar in metric._samples():
yield Sample(suffix, dict(series_labels + list(sample_labels.items())), value, timestamp, exemplar)
for suffix, sample_labels, value, timestamp, exemplar, native_histogram_value in metric._samples():
yield Sample(suffix, dict(series_labels + list(sample_labels.items())), value, timestamp, exemplar, native_histogram_value)

def _child_samples(self) -> Iterable[Sample]: # pragma: no cover
raise NotImplementedError('_child_samples() must be implemented by %r' % self)
Expand Down
7 changes: 3 additions & 4 deletions prometheus_client/metrics_core.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
from typing import Dict, List, Optional, Sequence, Tuple, Union

from .samples import Exemplar, Sample, Timestamp
from .samples import Exemplar, NativeHistogram, Sample, Timestamp

METRIC_TYPES = (
'counter', 'gauge', 'summary', 'histogram',
Expand Down Expand Up @@ -36,11 +36,11 @@ def __init__(self, name: str, documentation: str, typ: str, unit: str = ''):
self.type: str = typ
self.samples: List[Sample] = []

def add_sample(self, name: str, labels: Dict[str, str], value: float, timestamp: Optional[Union[Timestamp, float]] = None, exemplar: Optional[Exemplar] = None) -> None:
def add_sample(self, name: str, labels: Dict[str, str], value: float, timestamp: Optional[Union[Timestamp, float]] = None, exemplar: Optional[Exemplar] = None, native_histogram: Optional[NativeHistogram] = None) -> None:
"""Add a sample to the metric.

Internal-only, do not use."""
self.samples.append(Sample(name, labels, value, timestamp, exemplar))
self.samples.append(Sample(name, labels, value, timestamp, exemplar, native_histogram))

def __eq__(self, other: object) -> bool:
return (isinstance(other, Metric)
Expand Down Expand Up @@ -284,7 +284,6 @@ def add_metric(self,
Sample(self.name + '_sum', dict(zip(self._labelnames, labels)), sum_value, timestamp))



class GaugeHistogramMetricFamily(Metric):
"""A single gauge histogram and its samples.

Expand Down
2 changes: 1 addition & 1 deletion prometheus_client/multiprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def _accumulate_metrics(metrics, accumulate):
buckets = defaultdict(lambda: defaultdict(float))
samples_setdefault = samples.setdefault
for s in metric.samples:
name, labels, value, timestamp, exemplar = s
name, labels, value, timestamp, exemplar, native_histogram_value = s
if metric.type == 'gauge':
without_pid_key = (name, tuple(l for l in labels if l[0] != 'pid'))
if metric._multiprocess_mode in ('min', 'livemin'):
Expand Down
4 changes: 3 additions & 1 deletion prometheus_client/openmetrics/exposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
def _is_valid_exemplar_metric(metric, sample):
if metric.type == 'counter' and sample.name.endswith('_total'):
return True
if metric.type in ('histogram', 'gaugehistogram') and sample.name.endswith('_bucket'):
if metric.type in ('gaugehistogram') and sample.name.endswith('_bucket'):
return True
if metric.type in ('histogram') and sample.name.endswith('_bucket') or sample.name == metric.name:
return True
return False

Expand Down
156 changes: 133 additions & 23 deletions prometheus_client/openmetrics/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import re

from ..metrics_core import Metric, METRIC_LABEL_NAME_RE
from ..samples import Exemplar, Sample, Timestamp
from ..samples import BucketSpan, Exemplar, NativeHistogram, Sample, Timestamp
from ..utils import floatToGoString


Expand Down Expand Up @@ -364,6 +364,99 @@ def _parse_remaining_text(text):
return val, ts, exemplar


def _parse_nh_sample(text, suffixes):
labels_start = text.find("{")
# check if it's a native histogram with labels
re_nh_without_labels = re.compile(r'^[^{} ]+ {[^{}]+}$')
re_nh_with_labels = re.compile(r'[^{} ]+{[^{}]+} {[^{}]+}$')
csmarchbanks marked this conversation as resolved.
Show resolved Hide resolved
if re_nh_with_labels.match(text):
nh_value_start = text.rindex("{")
labels_end = nh_value_start - 2
labelstext = text[labels_start + 1:labels_end]
labels = _parse_labels(labelstext)
name_end = labels_start
name = text[:name_end]
if name.endswith(suffixes):
raise ValueError("the sample name of a native histogram with labels should have no suffixes", name)
nh_value = text[nh_value_start:]
nat_hist_value = _parse_nh_struct(nh_value)
return Sample(name, labels, None, None, None, nat_hist_value)
# check if it's a native histogram
if re_nh_without_labels.match(text):
nh_value_start = labels_start
nh_value = text[nh_value_start:]
name_end = nh_value_start - 1
name = text[:name_end]
if name.endswith(suffixes):
raise ValueError("the sample name of a native histogram should have no suffixes", name)
nat_hist_value = _parse_nh_struct(nh_value)
return Sample(name, None, None, None, None, nat_hist_value)
else:
# it's not a native histogram
return


def _parse_nh_struct(text):
pattern = r'(\w+):\s*([^,}]+)'

re_spans = re.compile(r'(positive_spans|negative_spans):\[(\d+:\d+,\d+:\d+)\]')
re_deltas = re.compile(r'(positive_deltas|negative_deltas):\[(-?\d+(?:,-?\d+)*)\]')

items = dict(re.findall(pattern, text))
spans = dict(re_spans.findall(text))
deltas = dict(re_deltas.findall(text))

count_value = int(items['count'])
sum_value = int(items['sum'])
schema = int(items['schema'])
zero_threshold = float(items['zero_threshold'])
zero_count = int(items['zero_count'])

try:
pos_spans_text = spans['positive_spans']
elems = pos_spans_text.split(',')
arg1 = [int(x) for x in elems[0].split(':')]
arg2 = [int(x) for x in elems[1].split(':')]
pos_spans = (BucketSpan(arg1[0], arg1[1]), BucketSpan(arg2[0], arg2[1]))
except KeyError:
pos_spans = None

try:
neg_spans_text = spans['negative_spans']
elems = neg_spans_text.split(',')
arg1 = [int(x) for x in elems[0].split(':')]
arg2 = [int(x) for x in elems[1].split(':')]
neg_spans = (BucketSpan(arg1[0], arg1[1]), BucketSpan(arg2[0], arg2[1]))
except KeyError:
neg_spans = None

try:
pos_deltas_text = deltas['positive_deltas']
elems = pos_deltas_text.split(',')
pos_deltas = tuple([int(x) for x in elems])
except KeyError:
pos_deltas = None

try:
neg_deltas_text = deltas['negative_deltas']
elems = neg_deltas_text.split(',')
neg_deltas = tuple([int(x) for x in elems])
except KeyError:
neg_deltas = None

return NativeHistogram(
count_value=count_value,
sum_value=sum_value,
schema=schema,
zero_threshold=zero_threshold,
zero_count=zero_count,
pos_spans=pos_spans,
neg_spans=neg_spans,
pos_deltas=pos_deltas,
neg_deltas=neg_deltas
)


def _group_for_sample(sample, name, typ):
if typ == 'info':
# We can't distinguish between groups for info metrics.
Expand Down Expand Up @@ -406,6 +499,8 @@ def do_checks():
for s in samples:
suffix = s.name[len(name):]
g = _group_for_sample(s, name, 'histogram')
if len(suffix) == 0:
continue
if g != group or s.timestamp != timestamp:
if group is not None:
do_checks()
Expand Down Expand Up @@ -486,6 +581,8 @@ def build_metric(name, documentation, typ, unit, samples):
metric.samples = samples
return metric

is_nh = False
typ = None
for line in fd:
if line[-1] == '\n':
line = line[:-1]
Expand Down Expand Up @@ -518,7 +615,7 @@ def build_metric(name, documentation, typ, unit, samples):
group_timestamp_samples = set()
samples = []
allowed_names = [parts[2]]

if parts[1] == 'HELP':
if documentation is not None:
raise ValueError("More than one HELP for metric: " + line)
Expand All @@ -537,8 +634,18 @@ def build_metric(name, documentation, typ, unit, samples):
else:
raise ValueError("Invalid line: " + line)
else:
sample = _parse_sample(line)
if sample.name not in allowed_names:
if typ == 'histogram':
# set to true to account for native histograms naming exceptions/sanitizing differences
is_nh = True
sample = _parse_nh_sample(line, tuple(type_suffixes['histogram']))
csmarchbanks marked this conversation as resolved.
Show resolved Hide resolved
# It's not a native histogram
if sample is None:
is_nh = False
sample = _parse_sample(line)
else:
is_nh = False
sample = _parse_sample(line)
if sample.name not in allowed_names and not is_nh:
if name is not None:
yield build_metric(name, documentation, typ, unit, samples)
# Start an unknown metric.
Expand Down Expand Up @@ -570,26 +677,29 @@ def build_metric(name, documentation, typ, unit, samples):
or _isUncanonicalNumber(sample.labels['quantile']))):
raise ValueError("Invalid quantile label: " + line)

g = tuple(sorted(_group_for_sample(sample, name, typ).items()))
if group is not None and g != group and g in seen_groups:
raise ValueError("Invalid metric grouping: " + line)
if group is not None and g == group:
if (sample.timestamp is None) != (group_timestamp is None):
raise ValueError("Mix of timestamp presence within a group: " + line)
if group_timestamp is not None and group_timestamp > sample.timestamp and typ != 'info':
raise ValueError("Timestamps went backwards within a group: " + line)
if not is_nh:
g = tuple(sorted(_group_for_sample(sample, name, typ).items()))
if group is not None and g != group and g in seen_groups:
raise ValueError("Invalid metric grouping: " + line)
if group is not None and g == group:
if (sample.timestamp is None) != (group_timestamp is None):
raise ValueError("Mix of timestamp presence within a group: " + line)
if group_timestamp is not None and group_timestamp > sample.timestamp and typ != 'info':
raise ValueError("Timestamps went backwards within a group: " + line)
else:
group_timestamp_samples = set()

series_id = (sample.name, tuple(sorted(sample.labels.items())))
if sample.timestamp != group_timestamp or series_id not in group_timestamp_samples:
# Not a duplicate due to timestamp truncation.
samples.append(sample)
group_timestamp_samples.add(series_id)

group = g
group_timestamp = sample.timestamp
seen_groups.add(g)
else:
group_timestamp_samples = set()

series_id = (sample.name, tuple(sorted(sample.labels.items())))
if sample.timestamp != group_timestamp or series_id not in group_timestamp_samples:
# Not a duplicate due to timestamp truncation.
samples.append(sample)
group_timestamp_samples.add(series_id)

group = g
group_timestamp = sample.timestamp
seen_groups.add(g)

if typ == 'stateset' and sample.value not in [0, 1]:
raise ValueError("Stateset samples can only have values zero and one: " + line)
Expand All @@ -606,7 +716,7 @@ def build_metric(name, documentation, typ, unit, samples):
(typ in ['histogram', 'gaugehistogram'] and sample.name.endswith('_bucket'))
or (typ in ['counter'] and sample.name.endswith('_total'))):
raise ValueError("Invalid line only histogram/gaugehistogram buckets and counters can have exemplars: " + line)

if name is not None:
yield build_metric(name, documentation, typ, unit, samples)

Expand Down
22 changes: 21 additions & 1 deletion prometheus_client/samples.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, NamedTuple, Optional, Union
from typing import Dict, NamedTuple, Optional, Sequence, Tuple, Union


class Timestamp:
Expand Down Expand Up @@ -34,6 +34,25 @@ def __lt__(self, other: "Timestamp") -> bool:
return self.nsec < other.nsec if self.sec == other.sec else self.sec < other.sec


# BucketSpan is experimental and subject to change at any time.
class BucketSpan(NamedTuple):
csmarchbanks marked this conversation as resolved.
Show resolved Hide resolved
offset: int
length: int


# NativeHistogram is experimental and subject to change at any time.
class NativeHistogram(NamedTuple):
count_value: float
sum_value: float
schema: int
zero_threshold: float
zero_count: float
pos_spans: Optional[Tuple[BucketSpan, BucketSpan]] = None
neg_spans: Optional[Tuple[BucketSpan, BucketSpan]] = None
pos_deltas: Optional[Sequence[int]] = None
neg_deltas: Optional[Sequence[int]] = None


# Timestamp and exemplar are optional.
# Value can be an int or a float.
# Timestamp can be a float containing a unixtime in seconds,
Expand All @@ -51,3 +70,4 @@ class Sample(NamedTuple):
value: float
timestamp: Optional[Union[float, Timestamp]] = None
exemplar: Optional[Exemplar] = None
native_histogram: Optional[NativeHistogram] = None
Loading