From fb8268ba3d6bb9e09a45076841f9da231423184c Mon Sep 17 00:00:00 2001 From: Danila Balagansky Date: Mon, 17 Jul 2023 15:19:51 +0300 Subject: [PATCH] Add more fill variants --- doc/source/rest.j2 | 34 ++-- gnocchi/rest/aggregates/processor.py | 21 ++- gnocchi/rest/api.py | 5 +- .../gabbits/aggregates-with-metric-ids.yaml | 2 +- .../tests/functional/gabbits/aggregation.yaml | 153 +++++++++++++++++- gnocchi/utils.py | 11 ++ .../more_fill_options-7f2f1048d7cb097a.yaml | 6 + 7 files changed, 212 insertions(+), 20 deletions(-) create mode 100644 releasenotes/notes/more_fill_options-7f2f1048d7cb097a.yaml diff --git a/doc/source/rest.j2 b/doc/source/rest.j2 index 9d023bb9c..c706751d7 100644 --- a/doc/source/rest.j2 +++ b/doc/source/rest.j2 @@ -793,12 +793,17 @@ Backfill ~~~~~~~~ The ability to fill in missing points from a subset of time series is supported -by specifying a `fill` value. Valid fill values include any float, `dropna` or -`null`. In the case of `null`, Gnocchi will compute the aggregation using only -the existing points. `dropna` is like `null` but remove NaN from the result. -The `fill` parameter will not backfill timestamps which contain no points in -any of the time series. Only timestamps which have datapoints in at least one -of the time series is returned. +by specifying a `fill` value. Valid fill values include any float, `dropna`, +`null`, `ffill`, `bfill` `full_ffill` or `full_bfill`. In the case of `null`, +Gnocchi will compute the aggregation using only the existing points. `dropna` is +like `null` but remove NaN from the result. `ffill` fills NaN measures in one +metric with previous non-NaN value, `bfill` fills NaN measures with next non-NaN +value, so if the metric starts (or, in case of `bfill`, ends) with NaNs, those +wouldn't change and would be excluded from the resulting set, just like `dropna` +does. To fill the remaining NaN values, producing a metric with as much +timestamps as there are in all metrics combined, one can use `full_ffill` and +`full_bfill` variants. In the case of `full_ffill`, it applies a forward then backward +fill and for `full_bfill`, a backward then forward fill. {{ scenarios['get-aggregates-by-metric-ids-fill']['doc'] }} @@ -1039,12 +1044,17 @@ expects 100% overlap. If this percentage is not reached, an error is returned. boundary to the first or last timestamp common across all series. The ability to fill in missing points from a subset of time series is supported -by specifying a `fill` value. Valid fill values include any float, `dropna` or -`null`. In the case of `null`, Gnocchi will compute the aggregation using only -the existing points. `dropna` is like `null` but remove NaN from the result. -The `fill` parameter will not backfill timestamps which contain no points in -any of the time series. Only timestamps which have datapoints in at least one -of the time series is returned. +by specifying a `fill` value. Valid fill values include any float, `dropna`, +`null`, `ffill`, `bfill` `full_ffill` or `full_bfill`. In the case of `null`, +Gnocchi will compute the aggregation using only the existing points. `dropna` is +like `null` but remove NaN from the result. `ffill` fills NaN measures in one +metric with previous non-NaN value, `bfill` fills NaN measures with next non-NaN +value, so if the metric starts (or, in case of `bfill`, ends) with NaNs, those +wouldn't change and would be excluded from the resulting set, just like `dropna` +does. To fill the remaining NaN values, producing a metric with as much +timestamps as there are in all metrics combined, one can use `full_ffill` and +`full_bfill` variants. In the case of `full_ffill`, it applies a forward then backward +fill and for `full_bfill`, a backward then forward fill. {{ scenarios['get-across-metrics-measures-by-metric-ids-fill']['doc'] }} diff --git a/gnocchi/rest/aggregates/processor.py b/gnocchi/rest/aggregates/processor.py index 2af385034..4bce4397f 100644 --- a/gnocchi/rest/aggregates/processor.py +++ b/gnocchi/rest/aggregates/processor.py @@ -157,7 +157,7 @@ def aggregated(refs_and_timeseries, operations, from_timestamp=None, return_inverse=True) # create nd-array (unique series x unique times) and fill - filler = (numpy.NaN if fill in [None, 'null', 'dropna'] + filler = (numpy.NaN if fill in [None, 'null', 'dropna', 'ffill', 'bfill', 'full_ffill', 'full_bfill'] else fill) val_grid = numpy.full((len(series[sampling]), len(times)), filler) start = 0 @@ -165,6 +165,21 @@ def aggregated(refs_and_timeseries, operations, from_timestamp=None, size = len(split) val_grid[i][indices[start:start + size]] = split['values'] start += size + + if fill == "ffill": + val_grid = utils.forward_fill(val_grid) + + if fill == "bfill": + val_grid = utils.backward_fill(val_grid) + + if fill == "full_ffill": + val_grid = utils.forward_fill(val_grid) + val_grid = utils.backward_fill(val_grid) + + if fill == "full_bfill": + val_grid = utils.backward_fill(val_grid) + val_grid = utils.forward_fill(val_grid) + values = val_grid.T if fill is None: @@ -200,7 +215,7 @@ def aggregated(refs_and_timeseries, operations, from_timestamp=None, output = {"aggregated": []} for sampling in sorted(result, reverse=True): granularity, times, values, references = result[sampling] - if fill == "dropna": + if fill in ("dropna", "ffill", "bfill", "full_ffill", "full_bfill"): pos = ~numpy.logical_or(numpy.isnan(values[0]), numpy.isinf(values[0])) v = values[0][pos] @@ -220,7 +235,7 @@ def aggregated(refs_and_timeseries, operations, from_timestamp=None, for sampling in sorted(result, reverse=True): granularity, times, values, references = result[sampling] for i, ref in enumerate(references): - if fill == "dropna": + if fill in ("dropna", "ffill", "bfill", "full_ffill", "full_bfill"): pos = ~numpy.logical_or(numpy.isnan(values[i]), numpy.isinf(values[i])) v = values[i][pos] diff --git a/gnocchi/rest/api.py b/gnocchi/rest/api.py index 6e23a4c8e..cbec5b575 100644 --- a/gnocchi/rest/api.py +++ b/gnocchi/rest/api.py @@ -1892,8 +1892,9 @@ def groupper(r): FillSchema = voluptuous.Schema( - voluptuous.Any(voluptuous.Coerce(float), "null", "dropna", - msg="Must be a float, 'dropna' or 'null'")) + voluptuous.Any(voluptuous.Coerce(float), "null", "dropna", "ffill", "bfill", + "full_ffill", "full_bfill", + msg="Must be a float, 'dropna', 'null', 'ffill', 'bfill', 'full_ffill' or 'full_bfill'")) # noqa def validate_qs(start=None, stop=None, granularity=None, diff --git a/gnocchi/tests/functional/gabbits/aggregates-with-metric-ids.yaml b/gnocchi/tests/functional/gabbits/aggregates-with-metric-ids.yaml index 5af23b458..8c1a97e6e 100644 --- a/gnocchi/tests/functional/gabbits/aggregates-with-metric-ids.yaml +++ b/gnocchi/tests/functional/gabbits/aggregates-with-metric-ids.yaml @@ -803,7 +803,7 @@ tests: $.code: 400 $.description.cause: "Argument value error" $.description.detail: "fill" - $.description.reason: "Must be a float, 'dropna' or 'null'" + $.description.reason: "Must be a float, 'dropna', 'null', 'ffill', 'bfill', 'full_ffill' or 'full_bfill'" - name: get rolling bad aggregate POST: /v1/aggregates diff --git a/gnocchi/tests/functional/gabbits/aggregation.yaml b/gnocchi/tests/functional/gabbits/aggregation.yaml index d85e85de9..5b57c8967 100644 --- a/gnocchi/tests/functional/gabbits/aggregation.yaml +++ b/gnocchi/tests/functional/gabbits/aggregation.yaml @@ -39,6 +39,30 @@ tests: archive_policy_name: low status: 201 + - name: create metric 3 + POST: /v1/metric + request_headers: + content-type: application/json + data: + archive_policy_name: low + status: 201 + + - name: create metric 4 + POST: /v1/metric + request_headers: + content-type: application/json + data: + archive_policy_name: low + status: 201 + + - name: create metric 5 (empty one) + POST: /v1/metric + request_headers: + content-type: application/json + data: + archive_policy_name: low + status: 201 + - name: get metric list GET: /v1/metric @@ -66,6 +90,34 @@ tests: value: 5 status: 202 + - name: push measurements to metric 3 + POST: /v1/metric/$HISTORY['get metric list'].$RESPONSE['$[2].id']/measures + request_headers: + content-type: application/json + data: + - timestamp: "2023-11-21T08:47:09" + value: 19.5 + - timestamp: "2023-11-21T08:48:33" + value: 32 + - timestamp: "2023-11-21T08:49:23" + value: 11.3 + status: 202 + + - name: push measurements to metric 4 + POST: /v1/metric/$HISTORY['get metric list'].$RESPONSE['$[3].id']/measures + request_headers: + content-type: application/json + data: + - timestamp: "2023-11-21T08:46:34" + value: 1.2 + - timestamp: "2023-11-21T08:48:33" + value: 4.2 + - timestamp: "2023-11-21T08:49:23" + value: 3 + - timestamp: "2023-11-21T08:50:11" + value: 4.9 + status: 202 + - name: get measure aggregates by granularity not float GET: /v1/aggregation/metric?metric=$HISTORY['get metric list'].$RESPONSE['$[0].id']&metric=$HISTORY['get metric list'].$RESPONSE['$[1].id']&granularity=foobar status: 400 @@ -74,14 +126,21 @@ tests: GET: /v1/aggregation/metric?metric=foobar status: 400 - - name: GET measure aggregates by granularity with refresh + - name: GET measure aggregates by granularity with refresh for metric 1 and 2 GET: /v1/aggregation/metric?metric=$HISTORY['get metric list'].$RESPONSE['$[0].id']&metric=$HISTORY['get metric list'].$RESPONSE['$[1].id']&granularity=1&refresh=true response_json_paths: $: - ['2015-03-06T14:33:57+00:00', 1.0, 23.1] - ['2015-03-06T14:34:12+00:00', 1.0, 7.0] - - name: POST measure aggregates by granularity with refresh + - name: GET measure aggregates by granularity with refresh for metric 3 and 4 + GET: /v1/aggregation/metric?metric=$HISTORY['get metric list'].$RESPONSE['$[2].id']&metric=$HISTORY['get metric list'].$RESPONSE['$[3].id']&granularity=1&refresh=true + response_json_paths: + $: + - ['2023-11-21T08:48:33+00:00', 1.0, 18.1] + - ['2023-11-21T08:49:23+00:00', 1.0, 7.15] + + - name: POST measure aggregates by granularity with refresh for metric 1 and 2 POST: /v1/aggregation/metric?granularity=1&refresh=true request_headers: content-type: application/json @@ -166,6 +225,96 @@ tests: - ['2015-03-06T14:34:12+00:00', 1.0, 7.0] - ['2015-03-06T14:35:12+00:00', 1.0, 2.5] + - name: get measure aggregates difference with default fill + POST: /v1/aggregates?granularity=1 + data: + operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[3].id'] mean)))" + response_json_paths: + $.measures.aggregated: + - ['2023-11-21T08:48:33+00:00', 1.0, 27.8] + - ['2023-11-21T08:49:23+00:00', 1.0, 8.3] + + - name: get measure aggregates difference with fill "ffill" + POST: /v1/aggregates?granularity=1&fill=ffill + data: + operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[3].id'] mean)))" + response_json_paths: + $.measures.aggregated: + - ['2023-11-21T08:47:09+00:00', 1.0, 18.3] + - ['2023-11-21T08:48:33+00:00', 1.0, 27.8] + - ['2023-11-21T08:49:23+00:00', 1.0, 8.3] + - ['2023-11-21T08:50:11+00:00', 1.0, 6.4] + + - name: get measure aggregates difference with fill "bfill" + POST: /v1/aggregates?granularity=1&fill=bfill + data: + operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[3].id'] mean)))" + response_json_paths: + $.measures.aggregated: + - ['2023-11-21T08:46:34+00:00', 1.0, 18.3] + - ['2023-11-21T08:47:09+00:00', 1.0, 15.3] + - ['2023-11-21T08:48:33+00:00', 1.0, 27.8] + - ['2023-11-21T08:49:23+00:00', 1.0, 8.3] + + - name: get measure aggregates difference with fill "full_ffill" + POST: /v1/aggregates?granularity=1&fill=full_ffill + data: + operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[3].id'] mean)))" + response_json_paths: + $.measures.aggregated: + - ['2023-11-21T08:46:34+00:00', 1.0, 18.3] + - ['2023-11-21T08:47:09+00:00', 1.0, 18.3] + - ['2023-11-21T08:48:33+00:00', 1.0, 27.8] + - ['2023-11-21T08:49:23+00:00', 1.0, 8.3] + - ['2023-11-21T08:50:11+00:00', 1.0, 6.4] + + - name: get measure aggregates difference with fill "full_bfill" + POST: /v1/aggregates?granularity=1&fill=full_bfill + data: + operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[3].id'] mean)))" + response_json_paths: + $.measures.aggregated: + - ['2023-11-21T08:46:34+00:00', 1.0, 18.3] + - ['2023-11-21T08:47:09+00:00', 1.0, 15.3] + - ['2023-11-21T08:48:33+00:00', 1.0, 27.8] + - ['2023-11-21T08:49:23+00:00', 1.0, 8.3] + - ['2023-11-21T08:50:11+00:00', 1.0, 6.4] + + - name: get measure aggregates difference with default fill (one metric is emtpy) + POST: /v1/aggregates?granularity=1 + data: + operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[4].id'] mean)))" + response_json_paths: + $.measures.aggregated: [] + + - name: get measure aggregates difference with fill "ffill" (one metric is emtpy) + POST: /v1/aggregates?granularity=1&fill=ffill + data: + operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[4].id'] mean)))" + response_json_paths: + $.measures.aggregated: [] + + - name: get measure aggregates difference with fill "bfill" (one metric is emtpy) + POST: /v1/aggregates?granularity=1&fill=bfill + data: + operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[4].id'] mean)))" + response_json_paths: + $.measures.aggregated: [] + + - name: get measure aggregates difference with fill "full_ffill" (one metric is emtpy) + POST: /v1/aggregates?granularity=1&fill=full_ffill + data: + operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[4].id'] mean)))" + response_json_paths: + $.measures.aggregated: [] + + - name: get measure aggregates difference with fill "full_bfill" (one metric is emtpy) + POST: /v1/aggregates?granularity=1&fill=full_bfill + data: + operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[4].id'] mean)))" + response_json_paths: + $.measures.aggregated: [] + - name: get measure aggregates with bad fill GET: /v1/aggregation/metric?metric=$HISTORY['get metric list'].$RESPONSE['$[0].id']&metric=$HISTORY['get metric list'].$RESPONSE['$[1].id']&granularity=1&fill=asdf status: 400 diff --git a/gnocchi/utils.py b/gnocchi/utils.py index ee2f52820..7cadf84b0 100644 --- a/gnocchi/utils.py +++ b/gnocchi/utils.py @@ -361,3 +361,14 @@ def is_resource_revision_needed(resource, request_attributes): ".", resource, k, v, database_attribute) return True return False + + +def forward_fill(arr): + mask = numpy.isnan(arr) + idx = numpy.where(~mask, numpy.arange(mask.shape[1]), 0) + numpy.maximum.accumulate(idx, axis=1, out=idx) + return arr[numpy.arange(idx.shape[0])[:, None], idx] + + +def backward_fill(arr): + return forward_fill(arr[:, ::-1])[:, ::-1] diff --git a/releasenotes/notes/more_fill_options-7f2f1048d7cb097a.yaml b/releasenotes/notes/more_fill_options-7f2f1048d7cb097a.yaml new file mode 100644 index 000000000..c67b99e31 --- /dev/null +++ b/releasenotes/notes/more_fill_options-7f2f1048d7cb097a.yaml @@ -0,0 +1,6 @@ +--- +features: + - | + Add `ffill`, `bfill`, `full_ffill` and `full_bfill` `fill` options, which + back- and/or forward-fill datapoints missing in one of the series in the + resulting set, see https://github.com/gnocchixyz/gnocchi/issues/1266.