Skip to content

Commit

Permalink
Merge pull request #1310 from dbalagansky/master
Browse files Browse the repository at this point in the history
Add more fill variants
  • Loading branch information
tobias-urdin authored Nov 15, 2023
2 parents e7d5ab3 + fb8268b commit aa2594c
Show file tree
Hide file tree
Showing 7 changed files with 212 additions and 20 deletions.
34 changes: 22 additions & 12 deletions doc/source/rest.j2
Original file line number Diff line number Diff line change
Expand Up @@ -793,12 +793,17 @@ Backfill
~~~~~~~~

The ability to fill in missing points from a subset of time series is supported
by specifying a `fill` value. Valid fill values include any float, `dropna` or
`null`. In the case of `null`, Gnocchi will compute the aggregation using only
the existing points. `dropna` is like `null` but remove NaN from the result.
The `fill` parameter will not backfill timestamps which contain no points in
any of the time series. Only timestamps which have datapoints in at least one
of the time series is returned.
by specifying a `fill` value. Valid fill values include any float, `dropna`,
`null`, `ffill`, `bfill` `full_ffill` or `full_bfill`. In the case of `null`,
Gnocchi will compute the aggregation using only the existing points. `dropna` is
like `null` but remove NaN from the result. `ffill` fills NaN measures in one
metric with previous non-NaN value, `bfill` fills NaN measures with next non-NaN
value, so if the metric starts (or, in case of `bfill`, ends) with NaNs, those
wouldn't change and would be excluded from the resulting set, just like `dropna`
does. To fill the remaining NaN values, producing a metric with as much
timestamps as there are in all metrics combined, one can use `full_ffill` and
`full_bfill` variants. In the case of `full_ffill`, it applies a forward then backward
fill and for `full_bfill`, a backward then forward fill.

{{ scenarios['get-aggregates-by-metric-ids-fill']['doc'] }}

Expand Down Expand Up @@ -1039,12 +1044,17 @@ expects 100% overlap. If this percentage is not reached, an error is returned.
boundary to the first or last timestamp common across all series.

The ability to fill in missing points from a subset of time series is supported
by specifying a `fill` value. Valid fill values include any float, `dropna` or
`null`. In the case of `null`, Gnocchi will compute the aggregation using only
the existing points. `dropna` is like `null` but remove NaN from the result.
The `fill` parameter will not backfill timestamps which contain no points in
any of the time series. Only timestamps which have datapoints in at least one
of the time series is returned.
by specifying a `fill` value. Valid fill values include any float, `dropna`,
`null`, `ffill`, `bfill` `full_ffill` or `full_bfill`. In the case of `null`,
Gnocchi will compute the aggregation using only the existing points. `dropna` is
like `null` but remove NaN from the result. `ffill` fills NaN measures in one
metric with previous non-NaN value, `bfill` fills NaN measures with next non-NaN
value, so if the metric starts (or, in case of `bfill`, ends) with NaNs, those
wouldn't change and would be excluded from the resulting set, just like `dropna`
does. To fill the remaining NaN values, producing a metric with as much
timestamps as there are in all metrics combined, one can use `full_ffill` and
`full_bfill` variants. In the case of `full_ffill`, it applies a forward then backward
fill and for `full_bfill`, a backward then forward fill.

{{ scenarios['get-across-metrics-measures-by-metric-ids-fill']['doc'] }}

Expand Down
21 changes: 18 additions & 3 deletions gnocchi/rest/aggregates/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,14 +157,29 @@ def aggregated(refs_and_timeseries, operations, from_timestamp=None,
return_inverse=True)

# create nd-array (unique series x unique times) and fill
filler = (numpy.NaN if fill in [None, 'null', 'dropna']
filler = (numpy.NaN if fill in [None, 'null', 'dropna', 'ffill', 'bfill', 'full_ffill', 'full_bfill']
else fill)
val_grid = numpy.full((len(series[sampling]), len(times)), filler)
start = 0
for i, split in enumerate(series[sampling]):
size = len(split)
val_grid[i][indices[start:start + size]] = split['values']
start += size

if fill == "ffill":
val_grid = utils.forward_fill(val_grid)

if fill == "bfill":
val_grid = utils.backward_fill(val_grid)

if fill == "full_ffill":
val_grid = utils.forward_fill(val_grid)
val_grid = utils.backward_fill(val_grid)

if fill == "full_bfill":
val_grid = utils.backward_fill(val_grid)
val_grid = utils.forward_fill(val_grid)

values = val_grid.T

if fill is None:
Expand Down Expand Up @@ -200,7 +215,7 @@ def aggregated(refs_and_timeseries, operations, from_timestamp=None,
output = {"aggregated": []}
for sampling in sorted(result, reverse=True):
granularity, times, values, references = result[sampling]
if fill == "dropna":
if fill in ("dropna", "ffill", "bfill", "full_ffill", "full_bfill"):
pos = ~numpy.logical_or(numpy.isnan(values[0]),
numpy.isinf(values[0]))
v = values[0][pos]
Expand All @@ -220,7 +235,7 @@ def aggregated(refs_and_timeseries, operations, from_timestamp=None,
for sampling in sorted(result, reverse=True):
granularity, times, values, references = result[sampling]
for i, ref in enumerate(references):
if fill == "dropna":
if fill in ("dropna", "ffill", "bfill", "full_ffill", "full_bfill"):
pos = ~numpy.logical_or(numpy.isnan(values[i]),
numpy.isinf(values[i]))
v = values[i][pos]
Expand Down
5 changes: 3 additions & 2 deletions gnocchi/rest/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1892,8 +1892,9 @@ def groupper(r):


FillSchema = voluptuous.Schema(
voluptuous.Any(voluptuous.Coerce(float), "null", "dropna",
msg="Must be a float, 'dropna' or 'null'"))
voluptuous.Any(voluptuous.Coerce(float), "null", "dropna", "ffill", "bfill",
"full_ffill", "full_bfill",
msg="Must be a float, 'dropna', 'null', 'ffill', 'bfill', 'full_ffill' or 'full_bfill'")) # noqa


def validate_qs(start=None, stop=None, granularity=None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,7 @@ tests:
$.code: 400
$.description.cause: "Argument value error"
$.description.detail: "fill"
$.description.reason: "Must be a float, 'dropna' or 'null'"
$.description.reason: "Must be a float, 'dropna', 'null', 'ffill', 'bfill', 'full_ffill' or 'full_bfill'"

- name: get rolling bad aggregate
POST: /v1/aggregates
Expand Down
153 changes: 151 additions & 2 deletions gnocchi/tests/functional/gabbits/aggregation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,30 @@ tests:
archive_policy_name: low
status: 201

- name: create metric 3
POST: /v1/metric
request_headers:
content-type: application/json
data:
archive_policy_name: low
status: 201

- name: create metric 4
POST: /v1/metric
request_headers:
content-type: application/json
data:
archive_policy_name: low
status: 201

- name: create metric 5 (empty one)
POST: /v1/metric
request_headers:
content-type: application/json
data:
archive_policy_name: low
status: 201

- name: get metric list
GET: /v1/metric

Expand Down Expand Up @@ -66,6 +90,34 @@ tests:
value: 5
status: 202

- name: push measurements to metric 3
POST: /v1/metric/$HISTORY['get metric list'].$RESPONSE['$[2].id']/measures
request_headers:
content-type: application/json
data:
- timestamp: "2023-11-21T08:47:09"
value: 19.5
- timestamp: "2023-11-21T08:48:33"
value: 32
- timestamp: "2023-11-21T08:49:23"
value: 11.3
status: 202

- name: push measurements to metric 4
POST: /v1/metric/$HISTORY['get metric list'].$RESPONSE['$[3].id']/measures
request_headers:
content-type: application/json
data:
- timestamp: "2023-11-21T08:46:34"
value: 1.2
- timestamp: "2023-11-21T08:48:33"
value: 4.2
- timestamp: "2023-11-21T08:49:23"
value: 3
- timestamp: "2023-11-21T08:50:11"
value: 4.9
status: 202

- name: get measure aggregates by granularity not float
GET: /v1/aggregation/metric?metric=$HISTORY['get metric list'].$RESPONSE['$[0].id']&metric=$HISTORY['get metric list'].$RESPONSE['$[1].id']&granularity=foobar
status: 400
Expand All @@ -74,14 +126,21 @@ tests:
GET: /v1/aggregation/metric?metric=foobar
status: 400

- name: GET measure aggregates by granularity with refresh
- name: GET measure aggregates by granularity with refresh for metric 1 and 2
GET: /v1/aggregation/metric?metric=$HISTORY['get metric list'].$RESPONSE['$[0].id']&metric=$HISTORY['get metric list'].$RESPONSE['$[1].id']&granularity=1&refresh=true
response_json_paths:
$:
- ['2015-03-06T14:33:57+00:00', 1.0, 23.1]
- ['2015-03-06T14:34:12+00:00', 1.0, 7.0]

- name: POST measure aggregates by granularity with refresh
- name: GET measure aggregates by granularity with refresh for metric 3 and 4
GET: /v1/aggregation/metric?metric=$HISTORY['get metric list'].$RESPONSE['$[2].id']&metric=$HISTORY['get metric list'].$RESPONSE['$[3].id']&granularity=1&refresh=true
response_json_paths:
$:
- ['2023-11-21T08:48:33+00:00', 1.0, 18.1]
- ['2023-11-21T08:49:23+00:00', 1.0, 7.15]

- name: POST measure aggregates by granularity with refresh for metric 1 and 2
POST: /v1/aggregation/metric?granularity=1&refresh=true
request_headers:
content-type: application/json
Expand Down Expand Up @@ -166,6 +225,96 @@ tests:
- ['2015-03-06T14:34:12+00:00', 1.0, 7.0]
- ['2015-03-06T14:35:12+00:00', 1.0, 2.5]

- name: get measure aggregates difference with default fill
POST: /v1/aggregates?granularity=1
data:
operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[3].id'] mean)))"
response_json_paths:
$.measures.aggregated:
- ['2023-11-21T08:48:33+00:00', 1.0, 27.8]
- ['2023-11-21T08:49:23+00:00', 1.0, 8.3]

- name: get measure aggregates difference with fill "ffill"
POST: /v1/aggregates?granularity=1&fill=ffill
data:
operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[3].id'] mean)))"
response_json_paths:
$.measures.aggregated:
- ['2023-11-21T08:47:09+00:00', 1.0, 18.3]
- ['2023-11-21T08:48:33+00:00', 1.0, 27.8]
- ['2023-11-21T08:49:23+00:00', 1.0, 8.3]
- ['2023-11-21T08:50:11+00:00', 1.0, 6.4]

- name: get measure aggregates difference with fill "bfill"
POST: /v1/aggregates?granularity=1&fill=bfill
data:
operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[3].id'] mean)))"
response_json_paths:
$.measures.aggregated:
- ['2023-11-21T08:46:34+00:00', 1.0, 18.3]
- ['2023-11-21T08:47:09+00:00', 1.0, 15.3]
- ['2023-11-21T08:48:33+00:00', 1.0, 27.8]
- ['2023-11-21T08:49:23+00:00', 1.0, 8.3]

- name: get measure aggregates difference with fill "full_ffill"
POST: /v1/aggregates?granularity=1&fill=full_ffill
data:
operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[3].id'] mean)))"
response_json_paths:
$.measures.aggregated:
- ['2023-11-21T08:46:34+00:00', 1.0, 18.3]
- ['2023-11-21T08:47:09+00:00', 1.0, 18.3]
- ['2023-11-21T08:48:33+00:00', 1.0, 27.8]
- ['2023-11-21T08:49:23+00:00', 1.0, 8.3]
- ['2023-11-21T08:50:11+00:00', 1.0, 6.4]

- name: get measure aggregates difference with fill "full_bfill"
POST: /v1/aggregates?granularity=1&fill=full_bfill
data:
operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[3].id'] mean)))"
response_json_paths:
$.measures.aggregated:
- ['2023-11-21T08:46:34+00:00', 1.0, 18.3]
- ['2023-11-21T08:47:09+00:00', 1.0, 15.3]
- ['2023-11-21T08:48:33+00:00', 1.0, 27.8]
- ['2023-11-21T08:49:23+00:00', 1.0, 8.3]
- ['2023-11-21T08:50:11+00:00', 1.0, 6.4]

- name: get measure aggregates difference with default fill (one metric is emtpy)
POST: /v1/aggregates?granularity=1
data:
operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[4].id'] mean)))"
response_json_paths:
$.measures.aggregated: []

- name: get measure aggregates difference with fill "ffill" (one metric is emtpy)
POST: /v1/aggregates?granularity=1&fill=ffill
data:
operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[4].id'] mean)))"
response_json_paths:
$.measures.aggregated: []

- name: get measure aggregates difference with fill "bfill" (one metric is emtpy)
POST: /v1/aggregates?granularity=1&fill=bfill
data:
operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[4].id'] mean)))"
response_json_paths:
$.measures.aggregated: []

- name: get measure aggregates difference with fill "full_ffill" (one metric is emtpy)
POST: /v1/aggregates?granularity=1&fill=full_ffill
data:
operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[4].id'] mean)))"
response_json_paths:
$.measures.aggregated: []

- name: get measure aggregates difference with fill "full_bfill" (one metric is emtpy)
POST: /v1/aggregates?granularity=1&fill=full_bfill
data:
operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[4].id'] mean)))"
response_json_paths:
$.measures.aggregated: []

- name: get measure aggregates with bad fill
GET: /v1/aggregation/metric?metric=$HISTORY['get metric list'].$RESPONSE['$[0].id']&metric=$HISTORY['get metric list'].$RESPONSE['$[1].id']&granularity=1&fill=asdf
status: 400
Expand Down
11 changes: 11 additions & 0 deletions gnocchi/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,3 +361,14 @@ def is_resource_revision_needed(resource, request_attributes):
".", resource, k, v, database_attribute)
return True
return False


def forward_fill(arr):
mask = numpy.isnan(arr)
idx = numpy.where(~mask, numpy.arange(mask.shape[1]), 0)
numpy.maximum.accumulate(idx, axis=1, out=idx)
return arr[numpy.arange(idx.shape[0])[:, None], idx]


def backward_fill(arr):
return forward_fill(arr[:, ::-1])[:, ::-1]
6 changes: 6 additions & 0 deletions releasenotes/notes/more_fill_options-7f2f1048d7cb097a.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
features:
- |
Add `ffill`, `bfill`, `full_ffill` and `full_bfill` `fill` options, which
back- and/or forward-fill datapoints missing in one of the series in the
resulting set, see https://github.com/gnocchixyz/gnocchi/issues/1266.

0 comments on commit aa2594c

Please sign in to comment.