Stricter TabulatedRatingCurve validation (#1469)

Fixes a part of #279. The four checks added here are: - At least two datapoints are needed. - The `flow_rate` must start at 0. - The `level` cannot be repeated. - The `flow_rate` cannot decrease with increasing `level`. And additionally we now ensure that flow rates are kept at 0 for levels below the minimum. ~~I should still check the docs, they may need some updating.~~ The other part of #279 is best done in a separate PR as it relates to the Basin / profile table. --------- Co-authored-by: Bart de Koning <[email protected]>
Deltares · May 21, 2024 · 224c460 · 224c460
1 parent f7762bb
commit 224c460
Show file tree

Hide file tree

Showing 8 changed files with 90 additions and 68 deletions.
diff --git a/core/src/read.jl b/core/src/read.jl
@@ -294,7 +294,11 @@ function TabulatedRatingCurve(
                 IterTools.groupby(row -> coalesce(row.control_state, nothing), static_id)
                 control_state = first(group).control_state
                 is_active = coalesce(first(group).active, true)
-                interpolation, is_valid = qh_interpolation(node_id, StructVector(group))
+                table = StructVector(group)
+                if !valid_tabulated_rating_curve(node_id, table)
+                    errors = true
+                end
+                interpolation = qh_interpolation(node_id, table)
                 if !ismissing(control_state)
                     control_mapping[(
                         NodeID(NodeType.TabulatedRatingCurve, node_id),
@@ -309,17 +313,16 @@ function TabulatedRatingCurve(
             # get the timestamp that applies to the model starttime
             idx_starttime = searchsortedlast(time.time, config.starttime)
             pre_table = view(time, 1:idx_starttime)
-            interpolation, is_valid = qh_interpolation(node_id, pre_table)
+            if !valid_tabulated_rating_curve(node_id, pre_table)
+                errors = true
+            end
+            interpolation = qh_interpolation(node_id, pre_table)
             push!(interpolations, interpolation)
             push!(active, true)
         else
             @error "$node_id data not in any table."
             errors = true
         end
-        if !is_valid
-            @error "A Q(h) relationship for $node_id from the $source table has repeated levels, this can not be interpolated."
-            errors = true
-        end
     end
 
     if errors

diff --git a/core/src/util.jl b/core/src/util.jl
@@ -243,25 +243,20 @@ function scalar_interpolation_derivative(
     end
 end
 
-function qh_interpolation(
-    level::AbstractVector,
-    flow_rate::AbstractVector,
-)::Tuple{ScalarInterpolation, Bool}
-    return LinearInterpolation(flow_rate, level; extrapolate = true), allunique(level)
-end
-
 """
 From a table with columns node_id, flow_rate (Q) and level (h),
 create a ScalarInterpolation from level to flow rate for a given node_id.
 """
-function qh_interpolation(
-    node_id::NodeID,
-    table::StructVector,
-)::Tuple{ScalarInterpolation, Bool}
-    nodetype = node_id.type
-    rowrange = findlastgroup(node_id, NodeID.(nodetype, table.node_id))
-    @assert !isempty(rowrange) "timeseries starts after model start time"
-    return qh_interpolation(table.level[rowrange], table.flow_rate[rowrange])
+function qh_interpolation(node_id::NodeID, table::StructVector)::ScalarInterpolation
+    rowrange = findlastgroup(node_id, NodeID.(node_id.type, table.node_id))
+    level = table.level[rowrange]
+    flow_rate = table.flow_rate[rowrange]
+
+    # Ensure that that Q stays 0 below the first level
+    pushfirst!(level, first(level) - 1)
+    pushfirst!(flow_rate, first(flow_rate))
+
+    return LinearInterpolation(flow_rate, level; extrapolate = true)
 end
 
 """

diff --git a/core/src/validation.jl b/core/src/validation.jl
@@ -99,6 +99,7 @@ sort_by_id(row) = row.node_id
 sort_by_time_id(row) = (row.time, row.node_id)
 sort_by_id_level(row) = (row.node_id, row.level)
 sort_by_id_state_level(row) = (row.node_id, row.control_state, row.level)
+sort_by_time_id_level(row) = (row.time, row.node_id, row.level)
 sort_by_priority(row) = (row.node_id, row.priority)
 sort_by_priority_time(row) = (row.node_id, row.priority, row.time)
 sort_by_subgrid_level(row) = (row.subgrid_id, row.basin_level)
@@ -109,6 +110,7 @@ sort_by_condition(row) = (row.node_id, row.compound_variable_id, row.greater_tha
 # get the right sort by function given the Schema, with sort_by_id as the default
 sort_by_function(table::StructVector{<:Legolas.AbstractRecord}) = sort_by_id
 sort_by_function(table::StructVector{TabulatedRatingCurveStaticV1}) = sort_by_id_state_level
+sort_by_function(table::StructVector{TabulatedRatingCurveTimeV1}) = sort_by_time_id_level
 sort_by_function(table::StructVector{BasinProfileV1}) = sort_by_id_level
 sort_by_function(table::StructVector{UserDemandStaticV1}) = sort_by_priority
 sort_by_function(table::StructVector{UserDemandTimeV1}) = sort_by_priority_time
@@ -122,10 +124,8 @@ const TimeSchemas = Union{
     FlowDemandTimeV1,
     LevelBoundaryTimeV1,
     PidControlTimeV1,
-    TabulatedRatingCurveTimeV1,
     UserDemandTimeV1,
 }
-
 function sort_by_function(table::StructVector{<:TimeSchemas})
     return sort_by_time_id
 end
@@ -400,6 +400,37 @@ function valid_demand(
     return !errors
 end
 
+function valid_tabulated_rating_curve(node_id::NodeID, table::StructVector)::Bool
+    errors = false
+
+    rowrange = findlastgroup(node_id, NodeID.(node_id.type, table.node_id))
+    level = table.level[rowrange]
+    flow_rate = table.flow_rate[rowrange]
+
+    n = length(level)
+    if n < 2
+        @error "At least two datapoints are needed." node_id n
+        errors = true
+    end
+    Q0 = first(flow_rate)
+    if Q0 != 0.0
+        @error "The `flow_rate` must start at 0." node_id flow_rate = Q0
+        errors = true
+    end
+
+    if !allunique(level)
+        @error "The `level` cannot be repeated." node_id
+        errors = true
+    end
+
+    if any(diff(flow_rate) .< 0.0)
+        @error "The `flow_rate` cannot decrease with increasing `level`." node_id
+        errors = true
+    end
+
+    return !errors
+end
+
 function incomplete_subnetwork(graph::MetaGraph, node_ids::Dict{Int32, Set{NodeID}})::Bool
     errors = false
     for (subnetwork_id, subnetwork_node_ids) in node_ids

diff --git a/core/test/control_test.jl b/core/test/control_test.jl
@@ -147,7 +147,7 @@ end
     t = Ribasim.datetime_since(discrete_control.record.time[2], model.config.starttime)
     @test Date(t) == Date("2020-03-16")
     # then the rating curve is updated to the "low" control_state
-    @test only(p.tabulated_rating_curve.tables).t[2] == 1.2
+    @test last(only(p.tabulated_rating_curve.tables).t) == 1.2
 end
 
 @testitem "Set PID target with DiscreteControl" begin

diff --git a/core/test/validation_test.jl b/core/test/validation_test.jl
@@ -2,6 +2,7 @@
     using Dictionaries: Indices
     using Ribasim: NodeID, valid_profiles, qh_interpolation, ScalarInterpolation
     using Logging
+    using StructArrays: StructVector
 
     node_id = Indices([NodeID(:Basin, 1)])
     level = [[0.0, 0.0, 1.0]]
@@ -24,11 +25,14 @@
     @test logger.logs[3].message ==
           "Basin #1 profile cannot have decreasing area at the top since extrapolating could lead to negative areas."
 
-    itp, valid = qh_interpolation([0.0, 0.0], [1.0, 2.0])
-    @test !valid
-    @test itp isa ScalarInterpolation
-    itp, valid = qh_interpolation([0.0, 0.1], [1.0, 2.0])
-    @test valid
+    table = StructVector(; flow_rate = [0.0, 0.1], level = [1.0, 2.0], node_id = [5, 5])
+    itp = qh_interpolation(NodeID(:TabulatedRatingCurve, 5), table)
+    # constant extrapolation at the bottom end, linear extrapolation at the top end
+    itp(0.0) ≈ 0.0
+    itp(1.0) ≈ 0.0
+    itp(1.5) ≈ 0.05
+    itp(2.0) ≈ 0.1
+    itp(3.0) ≈ 0.2
     @test itp isa ScalarInterpolation
 end
 
@@ -54,13 +58,14 @@ end
     end
     close(db)
 
-    @test length(logger.logs) == 2
+    @test length(logger.logs) == 3
     @test logger.logs[1].level == Error
-    @test logger.logs[1].message ==
-          "A Q(h) relationship for TabulatedRatingCurve #1 from the static table has repeated levels, this can not be interpolated."
+    @test logger.logs[1].message == "The `flow_rate` must start at 0."
     @test logger.logs[2].level == Error
-    @test logger.logs[2].message ==
-          "A Q(h) relationship for TabulatedRatingCurve #2 from the time table has repeated levels, this can not be interpolated."
+    @test logger.logs[2].message == "The `level` cannot be repeated."
+    @test logger.logs[3].level == Error
+    @test logger.logs[3].message ==
+          "The `flow_rate` cannot decrease with increasing `level`."
 end
 
 @testitem "Neighbor count validation" begin

diff --git a/docs/core/usage.qmd b/docs/core/usage.qmd
@@ -400,24 +400,29 @@ fraction      | Float64 | -            | in the interval [0,1]
 
 # TabulatedRatingCurve {#sec-tabulated-rating-curve}
 
-This table is similar in structure to the Basin profile. The TabulatedRatingCurve gives a
-relation between the storage of a connected Basin (via the outlet level) and its outflow.
+This table is similar in structure to the Basin profile.
+The TabulatedRatingCurve gives a relation between the level of a directly upstream Basin or LevelBoundary and its outflow.
 
 column        | type    | unit         | restriction
 ------------- | ------- | ------------ | -----------
 node_id       | Int32   | -            | sorted
 control_state | String  | -            | (optional) sorted per node_id
 active        | Bool    | -            | (optional, default true)
-level         | Float64 | $m$          | sorted per control_state
-flow_rate     | Float64 | $m^3 s^{-1}$ | non-negative
+level         | Float64 | $m$          | sorted per control_state, unique
+flow_rate     | Float64 | $m^3 s^{-1}$ | start at 0, increasing
+
+Thus a single rating curve can be given by the following table:
 
 node_id | flow_rate  | level
 ------- |----------- |-------
-      2 | 0.0        | -0.105
-      2 | 0.0        |  0.095
-      2 | 0.00942702 |  0.295
-      2 | 0.942702   | 20.095
-      3 | 0.0        |  2.129
+      2 | 0.0        | -0.10
+      2 | 0.0001     |  0.09
+      2 | 0.01       |  0.29
+      2 | 0.9        | 20.09
+
+Below the lowest given level of -0.10, the flow rate is kept at 0.
+Between given levels the flow rate is interpolated linearly.
+Above the maximum given level of 20.09, the flow rate keeps increases linearly according to the slope of the last segment.
 
 ## TabulatedRatingCurve / time
 

diff --git a/python/ribasim_testmodels/ribasim_testmodels/equations.py b/python/ribasim_testmodels/ribasim_testmodels/equations.py
@@ -67,7 +67,7 @@ def rating_curve_model() -> Model:
     )
 
     level_min = 1.0
-    level = np.linspace(0, 12, 100)
+    level = np.linspace(1, 12, 100)
     flow_rate = np.square(level - level_min) / (60 * 60 * 24)
     model.tabulated_rating_curve.add(
         Node(2, Point(1, 0)),

diff --git a/python/ribasim_testmodels/ribasim_testmodels/invalid.py b/python/ribasim_testmodels/ribasim_testmodels/invalid.py
@@ -1,6 +1,5 @@
 from typing import Any
 
-import pandas as pd
 from ribasim.config import Node, Solver
 from ribasim.input_base import TableModel
 from ribasim.model import Model
@@ -16,6 +15,12 @@
 
 
 def invalid_qh_model() -> Model:
+    """
+    Invalid TabulatedRatingCurve Q(h) table:
+    - levels must be unique
+    - flow_rate must start at 0
+    - flow_rate must not be decreasing
+    """
     model = Model(
         starttime="2020-01-01",
         endtime="2020-12-01",
@@ -24,29 +29,7 @@ def invalid_qh_model() -> Model:
 
     model.tabulated_rating_curve.add(
         Node(1, Point(0, 0)),
-        # Invalid: levels must not be repeated
-        [tabulated_rating_curve.Static(level=[0, 0], flow_rate=[1, 2])],
-    )
-    model.tabulated_rating_curve.add(
-        Node(2, Point(0, 1)),
-        [
-            tabulated_rating_curve.Time(
-                time=[
-                    pd.Timestamp("2020-01-01"),
-                    pd.Timestamp("2020-01-01"),
-                ],
-                # Invalid: levels must not be repeated
-                level=[0, 0],
-                flow_rate=[1, 2],
-            )
-        ],
-    )
-    model.basin.add(
-        Node(3, Point(0, 2)),
-        [
-            basin.State(level=[1.4112729908597084]),
-            basin.Profile(area=[0.01, 1], level=[0, 1]),
-        ],
+        [tabulated_rating_curve.Static(level=[0, 0, 1], flow_rate=[1, 2, 1.5])],
     )
 
     return model