Merge pull request #1310 from dbalagansky/master

Add more fill variants
gnocchixyz · Nov 15, 2023 · aa2594c · aa2594c
2 parents e7d5ab3 + fb8268b
commit aa2594c
Show file tree

Hide file tree

Showing 7 changed files with 212 additions and 20 deletions.
diff --git a/doc/source/rest.j2 b/doc/source/rest.j2
@@ -793,12 +793,17 @@ Backfill
 ~~~~~~~~
 
 The ability to fill in missing points from a subset of time series is supported
-by specifying a `fill` value. Valid fill values include any float, `dropna` or
-`null`. In the case of `null`, Gnocchi will compute the aggregation using only
-the existing points. `dropna` is like `null` but remove NaN from the result.
-The `fill` parameter will not backfill timestamps which contain no points in
-any of the time series. Only timestamps which have datapoints in at least one
-of the time series is returned.
+by specifying a `fill` value. Valid fill values include any float, `dropna`,
+`null`, `ffill`, `bfill` `full_ffill` or `full_bfill`. In the case of `null`,
+Gnocchi will compute the aggregation using only the existing points. `dropna` is
+like `null` but remove NaN from the result. `ffill` fills NaN measures in one
+metric with previous non-NaN value, `bfill` fills NaN measures with next non-NaN
+value, so if the metric starts (or, in case of `bfill`, ends) with NaNs, those
+wouldn't change and would be excluded from the resulting set, just like `dropna`
+does. To fill the remaining NaN values, producing a metric with as much
+timestamps as there are in all metrics combined, one can use `full_ffill` and
+`full_bfill` variants. In the case of `full_ffill`, it applies a forward then backward
+fill and for `full_bfill`, a backward then forward fill.
 
 {{ scenarios['get-aggregates-by-metric-ids-fill']['doc'] }}
 
@@ -1039,12 +1044,17 @@ expects 100% overlap. If this percentage is not reached, an error is returned.
    boundary to the first or last timestamp common across all series.
 
 The ability to fill in missing points from a subset of time series is supported
-by specifying a `fill` value. Valid fill values include any float, `dropna` or
-`null`. In the case of `null`, Gnocchi will compute the aggregation using only
-the existing points. `dropna` is like `null` but remove NaN from the result.
-The `fill` parameter will not backfill timestamps which contain no points in
-any of the time series. Only timestamps which have datapoints in at least one
-of the time series is returned.
+by specifying a `fill` value. Valid fill values include any float, `dropna`,
+`null`, `ffill`, `bfill` `full_ffill` or `full_bfill`. In the case of `null`,
+Gnocchi will compute the aggregation using only the existing points. `dropna` is
+like `null` but remove NaN from the result. `ffill` fills NaN measures in one
+metric with previous non-NaN value, `bfill` fills NaN measures with next non-NaN
+value, so if the metric starts (or, in case of `bfill`, ends) with NaNs, those
+wouldn't change and would be excluded from the resulting set, just like `dropna`
+does. To fill the remaining NaN values, producing a metric with as much
+timestamps as there are in all metrics combined, one can use `full_ffill` and
+`full_bfill` variants. In the case of `full_ffill`, it applies a forward then backward
+fill and for `full_bfill`, a backward then forward fill.
 
 {{ scenarios['get-across-metrics-measures-by-metric-ids-fill']['doc'] }}
 

diff --git a/gnocchi/rest/aggregates/processor.py b/gnocchi/rest/aggregates/processor.py
@@ -157,14 +157,29 @@ def aggregated(refs_and_timeseries, operations, from_timestamp=None,
             return_inverse=True)
 
         # create nd-array (unique series x unique times) and fill
-        filler = (numpy.NaN if fill in [None, 'null', 'dropna']
+        filler = (numpy.NaN if fill in [None, 'null', 'dropna', 'ffill', 'bfill', 'full_ffill', 'full_bfill']
                   else fill)
         val_grid = numpy.full((len(series[sampling]), len(times)), filler)
         start = 0
         for i, split in enumerate(series[sampling]):
             size = len(split)
             val_grid[i][indices[start:start + size]] = split['values']
             start += size
+
+        if fill == "ffill":
+            val_grid = utils.forward_fill(val_grid)
+
+        if fill == "bfill":
+            val_grid = utils.backward_fill(val_grid)
+
+        if fill == "full_ffill":
+            val_grid = utils.forward_fill(val_grid)
+            val_grid = utils.backward_fill(val_grid)
+
+        if fill == "full_bfill":
+            val_grid = utils.backward_fill(val_grid)
+            val_grid = utils.forward_fill(val_grid)
+
         values = val_grid.T
 
         if fill is None:
@@ -200,7 +215,7 @@ def aggregated(refs_and_timeseries, operations, from_timestamp=None,
         output = {"aggregated": []}
         for sampling in sorted(result, reverse=True):
             granularity, times, values, references = result[sampling]
-            if fill == "dropna":
+            if fill in ("dropna", "ffill", "bfill", "full_ffill", "full_bfill"):
                 pos = ~numpy.logical_or(numpy.isnan(values[0]),
                                         numpy.isinf(values[0]))
                 v = values[0][pos]
@@ -220,7 +235,7 @@ def aggregated(refs_and_timeseries, operations, from_timestamp=None,
         for sampling in sorted(result, reverse=True):
             granularity, times, values, references = result[sampling]
             for i, ref in enumerate(references):
-                if fill == "dropna":
+                if fill in ("dropna", "ffill", "bfill", "full_ffill", "full_bfill"):
                     pos = ~numpy.logical_or(numpy.isnan(values[i]),
                                             numpy.isinf(values[i]))
                     v = values[i][pos]

diff --git a/gnocchi/rest/api.py b/gnocchi/rest/api.py
@@ -1892,8 +1892,9 @@ def groupper(r):
 
 
 FillSchema = voluptuous.Schema(
-    voluptuous.Any(voluptuous.Coerce(float), "null", "dropna",
-                   msg="Must be a float, 'dropna' or 'null'"))
+    voluptuous.Any(voluptuous.Coerce(float), "null", "dropna", "ffill", "bfill",
+                   "full_ffill", "full_bfill",
+                   msg="Must be a float, 'dropna', 'null', 'ffill', 'bfill', 'full_ffill' or 'full_bfill'")) # noqa
 
 
 def validate_qs(start=None, stop=None, granularity=None,

diff --git a/gnocchi/tests/functional/gabbits/aggregates-with-metric-ids.yaml b/gnocchi/tests/functional/gabbits/aggregates-with-metric-ids.yaml
@@ -803,7 +803,7 @@ tests:
         $.code: 400
         $.description.cause: "Argument value error"
         $.description.detail: "fill"
-        $.description.reason: "Must be a float, 'dropna' or 'null'"
+        $.description.reason: "Must be a float, 'dropna', 'null', 'ffill', 'bfill', 'full_ffill' or 'full_bfill'"
 
     - name: get rolling bad aggregate
       POST: /v1/aggregates

diff --git a/gnocchi/tests/functional/gabbits/aggregation.yaml b/gnocchi/tests/functional/gabbits/aggregation.yaml
@@ -39,6 +39,30 @@ tests:
         archive_policy_name: low
       status: 201
 
+    - name: create metric 3
+      POST: /v1/metric
+      request_headers:
+          content-type: application/json
+      data:
+        archive_policy_name: low
+      status: 201
+
+    - name: create metric 4
+      POST: /v1/metric
+      request_headers:
+          content-type: application/json
+      data:
+        archive_policy_name: low
+      status: 201
+
+    - name: create metric 5 (empty one)
+      POST: /v1/metric
+      request_headers:
+          content-type: application/json
+      data:
+        archive_policy_name: low
+      status: 201
+
     - name: get metric list
       GET: /v1/metric
 
@@ -66,6 +90,34 @@ tests:
             value: 5
       status: 202
 
+    - name: push measurements to metric 3
+      POST: /v1/metric/$HISTORY['get metric list'].$RESPONSE['$[2].id']/measures
+      request_headers:
+           content-type: application/json
+      data:
+          - timestamp: "2023-11-21T08:47:09"
+            value: 19.5
+          - timestamp: "2023-11-21T08:48:33"
+            value: 32
+          - timestamp: "2023-11-21T08:49:23"
+            value: 11.3
+      status: 202
+
+    - name: push measurements to metric 4
+      POST: /v1/metric/$HISTORY['get metric list'].$RESPONSE['$[3].id']/measures
+      request_headers:
+           content-type: application/json
+      data:
+          - timestamp: "2023-11-21T08:46:34"
+            value: 1.2
+          - timestamp: "2023-11-21T08:48:33"
+            value: 4.2
+          - timestamp: "2023-11-21T08:49:23"
+            value: 3
+          - timestamp: "2023-11-21T08:50:11"
+            value: 4.9
+      status: 202
+
     - name: get measure aggregates by granularity not float
       GET: /v1/aggregation/metric?metric=$HISTORY['get metric list'].$RESPONSE['$[0].id']&metric=$HISTORY['get metric list'].$RESPONSE['$[1].id']&granularity=foobar
       status: 400
@@ -74,14 +126,21 @@ tests:
       GET: /v1/aggregation/metric?metric=foobar
       status: 400
 
-    - name: GET measure aggregates by granularity with refresh
+    - name: GET measure aggregates by granularity with refresh for metric 1 and 2
       GET: /v1/aggregation/metric?metric=$HISTORY['get metric list'].$RESPONSE['$[0].id']&metric=$HISTORY['get metric list'].$RESPONSE['$[1].id']&granularity=1&refresh=true
       response_json_paths:
         $:
           - ['2015-03-06T14:33:57+00:00', 1.0, 23.1]
           - ['2015-03-06T14:34:12+00:00', 1.0, 7.0]
 
-    - name: POST measure aggregates by granularity with refresh
+    - name: GET measure aggregates by granularity with refresh for metric 3 and 4
+      GET: /v1/aggregation/metric?metric=$HISTORY['get metric list'].$RESPONSE['$[2].id']&metric=$HISTORY['get metric list'].$RESPONSE['$[3].id']&granularity=1&refresh=true
+      response_json_paths:
+        $:
+          - ['2023-11-21T08:48:33+00:00', 1.0, 18.1]
+          - ['2023-11-21T08:49:23+00:00', 1.0, 7.15]
+
+    - name: POST measure aggregates by granularity with refresh for metric 1 and 2
       POST: /v1/aggregation/metric?granularity=1&refresh=true
       request_headers:
           content-type: application/json
@@ -166,6 +225,96 @@ tests:
           - ['2015-03-06T14:34:12+00:00', 1.0, 7.0]
           - ['2015-03-06T14:35:12+00:00', 1.0, 2.5]
 
+    - name: get measure aggregates difference with default fill
+      POST: /v1/aggregates?granularity=1
+      data:
+        operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[3].id'] mean)))"
+      response_json_paths:
+        $.measures.aggregated:
+          - ['2023-11-21T08:48:33+00:00', 1.0, 27.8]
+          - ['2023-11-21T08:49:23+00:00', 1.0, 8.3]
+
+    - name: get measure aggregates difference with fill "ffill"
+      POST: /v1/aggregates?granularity=1&fill=ffill
+      data:
+        operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[3].id'] mean)))"
+      response_json_paths:
+        $.measures.aggregated:
+          - ['2023-11-21T08:47:09+00:00', 1.0, 18.3]
+          - ['2023-11-21T08:48:33+00:00', 1.0, 27.8]
+          - ['2023-11-21T08:49:23+00:00', 1.0, 8.3]
+          - ['2023-11-21T08:50:11+00:00', 1.0, 6.4]
+
+    - name: get measure aggregates difference with fill "bfill"
+      POST: /v1/aggregates?granularity=1&fill=bfill
+      data:
+        operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[3].id'] mean)))"
+      response_json_paths:
+        $.measures.aggregated:
+          - ['2023-11-21T08:46:34+00:00', 1.0, 18.3]
+          - ['2023-11-21T08:47:09+00:00', 1.0, 15.3]
+          - ['2023-11-21T08:48:33+00:00', 1.0, 27.8]
+          - ['2023-11-21T08:49:23+00:00', 1.0, 8.3]
+
+    - name: get measure aggregates difference with fill "full_ffill"
+      POST: /v1/aggregates?granularity=1&fill=full_ffill
+      data:
+        operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[3].id'] mean)))"
+      response_json_paths:
+        $.measures.aggregated:
+          - ['2023-11-21T08:46:34+00:00', 1.0, 18.3]
+          - ['2023-11-21T08:47:09+00:00', 1.0, 18.3]
+          - ['2023-11-21T08:48:33+00:00', 1.0, 27.8]
+          - ['2023-11-21T08:49:23+00:00', 1.0, 8.3]
+          - ['2023-11-21T08:50:11+00:00', 1.0, 6.4]
+
+    - name: get measure aggregates difference with fill "full_bfill"
+      POST: /v1/aggregates?granularity=1&fill=full_bfill
+      data:
+        operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[3].id'] mean)))"
+      response_json_paths:
+        $.measures.aggregated:
+          - ['2023-11-21T08:46:34+00:00', 1.0, 18.3]
+          - ['2023-11-21T08:47:09+00:00', 1.0, 15.3]
+          - ['2023-11-21T08:48:33+00:00', 1.0, 27.8]
+          - ['2023-11-21T08:49:23+00:00', 1.0, 8.3]
+          - ['2023-11-21T08:50:11+00:00', 1.0, 6.4]
+
+    - name: get measure aggregates difference with default fill (one metric is emtpy)
+      POST: /v1/aggregates?granularity=1
+      data:
+        operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[4].id'] mean)))"
+      response_json_paths:
+        $.measures.aggregated: []
+
+    - name: get measure aggregates difference with fill "ffill" (one metric is emtpy)
+      POST: /v1/aggregates?granularity=1&fill=ffill
+      data:
+        operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[4].id'] mean)))"
+      response_json_paths:
+        $.measures.aggregated: []
+
+    - name: get measure aggregates difference with fill "bfill" (one metric is emtpy)
+      POST: /v1/aggregates?granularity=1&fill=bfill
+      data:
+        operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[4].id'] mean)))"
+      response_json_paths:
+        $.measures.aggregated: []
+
+    - name: get measure aggregates difference with fill "full_ffill" (one metric is emtpy)
+      POST: /v1/aggregates?granularity=1&fill=full_ffill
+      data:
+        operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[4].id'] mean)))"
+      response_json_paths:
+        $.measures.aggregated: []
+
+    - name: get measure aggregates difference with fill "full_bfill" (one metric is emtpy)
+      POST: /v1/aggregates?granularity=1&fill=full_bfill
+      data:
+        operations: "(aggregate mean (- (metric $HISTORY['get metric list'].$RESPONSE['$[2].id'] mean) (metric $HISTORY['get metric list'].$RESPONSE['$[4].id'] mean)))"
+      response_json_paths:
+        $.measures.aggregated: []
+
     - name: get measure aggregates with bad fill
       GET: /v1/aggregation/metric?metric=$HISTORY['get metric list'].$RESPONSE['$[0].id']&metric=$HISTORY['get metric list'].$RESPONSE['$[1].id']&granularity=1&fill=asdf
       status: 400

diff --git a/gnocchi/utils.py b/gnocchi/utils.py
@@ -361,3 +361,14 @@ def is_resource_revision_needed(resource, request_attributes):
                       ".", resource, k, v, database_attribute)
             return True
     return False
+
+
+def forward_fill(arr):
+    mask = numpy.isnan(arr)
+    idx = numpy.where(~mask, numpy.arange(mask.shape[1]), 0)
+    numpy.maximum.accumulate(idx, axis=1, out=idx)
+    return arr[numpy.arange(idx.shape[0])[:, None], idx]
+
+
+def backward_fill(arr):
+    return forward_fill(arr[:, ::-1])[:, ::-1]
diff --git a/releasenotes/notes/more_fill_options-7f2f1048d7cb097a.yaml b/releasenotes/notes/more_fill_options-7f2f1048d7cb097a.yaml
@@ -0,0 +1,6 @@
+---
+features:
+  - |
+    Add `ffill`, `bfill`, `full_ffill` and `full_bfill` `fill` options, which
+    back- and/or forward-fill datapoints missing in one of the series in the
+    resulting set, see https://github.com/gnocchixyz/gnocchi/issues/1266.