diff --git a/Project.toml b/Project.toml index a3cb3d5..ee3c16d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PowerAnalytics" uuid = "56ce1300-00bc-47e4-ba8c-b166ccc19f51" -authors = ["cbarrows "] -version = "0.6.1" +authors = ["Gabriel Konar-Steenberg ", "cbarrows "] +version = "0.7.0" [deps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" @@ -11,13 +11,14 @@ InfrastructureSystems = "2cd47ed4-ca9b-11e9-27f2-ab636a7671f1" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" PowerSimulations = "e690365d-45e2-57bb-ac84-44ba829e73c4" PowerSystems = "bcd98974-b02a-5e2f-9ee0-a103f5c450dd" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" TimeSeries = "9e3dc215-6440-5c97-bce1-76c03772f85e" YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" [compat] -Dates = "1" DataFrames = "1" DataStructures = "0.18" +Dates = "1" InfrastructureSystems = "1" InteractiveUtils = "1" PowerSimulations = "^0.27.1" diff --git a/src/PowerAnalytics.jl b/src/PowerAnalytics.jl index 3090104..884a2f9 100644 --- a/src/PowerAnalytics.jl +++ b/src/PowerAnalytics.jl @@ -7,13 +7,46 @@ export get_service_data export categorize_data export no_datetime +export ComponentSelector, ComponentSelectorElement, ComponentSelectorSet +export select_components, get_name, get_subselectors +export Metric, TimedMetric, TimelessMetric, ComponentSelectorTimedMetric, + ComponentTimedMetric, + SystemTimedMetric, ResultsTimelessMetric, CustomTimedMetric +export DATETIME_COL, META_COL_KEY, SYSTEM_COL, RESULTS_COL, AGG_META_KEY +export is_col_meta, set_col_meta, set_col_meta!, time_df, time_vec, data_cols, data_df, + data_vec, data_mat, get_description, get_component_agg_fn, get_time_agg_fn, + with_component_agg_fn, with_time_agg_fn, metric_selector_to_string, get_agg_meta, + set_agg_meta! +export compute, compute_set, compute_all, hcat_timed, aggregate_time, compose_metrics +export create_problem_results_dict +export load_component_selector, storage_component_selector, generator_selectors_by_fuel +export calc_active_power, calc_production_cost, calc_startup_cost, calc_shutdown_cost, + calc_discharge_cycles, calc_system_slack_up, calc_load_forecast, calc_active_power_in, + calc_active_power_out, calc_stored_energy, calc_active_power_forecast, calc_curtailment, + calc_sum_objective_value, calc_sum_solve_time, calc_sum_bytes_alloc, calc_total_cost, + make_calc_is_slack_up, calc_is_slack_up, calc_system_load_forecast, + calc_net_load_forecast, calc_curtailment_frac, calc_load_from_storage, + calc_system_load_from_storage, calc_integration, calc_capacity_factor +export mean, weighted_mean, unweighted_sum + #I/O Imports import Dates import TimeSeries +import Statistics +import Statistics: mean import DataFrames +import DataFrames: DataFrame, metadata, metadata!, colmetadata, colmetadata! import YAML import DataStructures: OrderedDict, SortedDict import PowerSystems +import PowerSystems: + Component, + ComponentSelector, ComponentSelectorElement, ComponentSelectorSet, + select_components, get_name, get_subselectors, + get_component, get_components, + get_available, + NAME_DELIMETER + import InfrastructureSystems import PowerSimulations import InteractiveUtils @@ -26,6 +59,11 @@ include("definitions.jl") include("get_data.jl") include("fuel_results.jl") +include("metrics.jl") +include("input.jl") +include("builtin_component_selectors.jl") +include("builtin_metrics.jl") + greet() = print("Hello World!") end # module PowerAnalytics diff --git a/src/builtin_component_selectors.jl b/src/builtin_component_selectors.jl new file mode 100644 index 0000000..278bae5 --- /dev/null +++ b/src/builtin_component_selectors.jl @@ -0,0 +1,62 @@ +"A ComponentSelector representing all the electric load in a System" +load_component_selector = select_components(PSY.ElectricLoad) + +"A ComponentSelector representing all the storage in a System" +storage_component_selector = select_components(PSY.Storage) + +FUEL_TYPES_DATA_FILE = + joinpath(dirname(dirname(pathof(PowerAnalytics))), "deps", "generator_mapping.yaml") + +# Parse the strings in generator_mapping.yaml into types and enum items +function parse_fuel_category(category_spec::Dict) + # Use reflection to look up the type corresponding the generator type string (this isn't a security risk, is it?) + gen_type = getproperty(PowerSystems, Symbol(get(category_spec, "gentype", Component))) + (gen_type === Any) && (gen_type = Component) + @assert gen_type <: Component + + pm = get(category_spec, "primemover", nothing) + isnothing(pm) || (pm = PowerSystems.parse_enum_mapping(PowerSystems.PrimeMovers, pm)) + + fc = get(category_spec, "fuel", nothing) + isnothing(fc) || (fc = PowerSystems.parse_enum_mapping(PowerSystems.ThermalFuels, fc)) + + return gen_type, pm, fc +end + +function make_fuel_component_selector(category_spec::Dict) + parse_results = parse_fuel_category(category_spec) + (gen_type, prime_mover, fuel_category) = parse_results + + function filter_closure(comp::Component) + comp_sig = Tuple{typeof(comp)} + if !isnothing(prime_mover) + hasmethod(PowerSystems.get_prime_mover_type, comp_sig) || return false + (PowerSystems.get_prime_mover_type(comp) == prime_mover) || return false + end + if !isnothing(fuel_category) + hasmethod(PowerSystems.get_fuel, comp_sig) || return false + (PowerSystems.get_fuel(comp) == fuel_category) || return false + end + return true + end + + # Create a nice name that is guaranteed to never collide with fully-qualified component names + selector_name = join(ifelse.(isnothing.(parse_results), "", string.(parse_results)), + NAME_DELIMETER) + + return select_components(filter_closure, gen_type, selector_name) +end + +# Based on old PowerAnalytics' get_generator_mapping +function load_generator_fuel_mappings(filename = FUEL_TYPES_DATA_FILE) + in_data = open(YAML.load, filename) + mappings = OrderedDict{String, ComponentSelector}() + for top_level in in_data |> keys |> collect |> sort + subselectors = make_fuel_component_selector.(in_data[top_level]) + mappings[top_level] = select_components(subselectors...; name = top_level) + end + return mappings +end + +"A dictionary of nested `ComponentSelector`s representing all the generators in a System categorized by fuel type" +generator_selectors_by_fuel = load_generator_fuel_mappings() diff --git a/src/builtin_metrics.jl b/src/builtin_metrics.jl new file mode 100644 index 0000000..c8ff809 --- /dev/null +++ b/src/builtin_metrics.jl @@ -0,0 +1,429 @@ +# TODO: put make_key in PowerSimulations and refactor existing code to use it +# TODO test + +"The various key entry types that can work with a System" +const SystemEntryType = Union{ + PSI.VariableType, + PSI.ExpressionType, +} + +"The various key entry types that can be used to make a PSI.OptimizationContainerKey" +const EntryType = Union{ + SystemEntryType, + PSI.ParameterType, + PSI.AuxVariableType, + PSI.InitialConditionType, +} + +"Create a PSI.OptimizationContainerKey from the given key entry type and component. + +# Arguments + - `entry::Type{<:EntryType}`: the key entry + - `component` (`::Type{<:Union{Component, PSY.System}}` or `::Type{<:Component}` depending + on the key type): the component type +" +function make_key end +make_key(entry::Type{<:PSI.VariableType}, component::Type{<:Union{Component, PSY.System}}) = + PSI.VariableKey(entry, component) +make_key(entry::Type{<:PSI.ExpressionType}, comp::Type{<:Union{Component, PSY.System}}) = + PSI.ExpressionKey(entry, comp) +make_key(entry::Type{<:PSI.ParameterType}, component::Type{<:Component}) = + PSI.ParameterKey(entry, component) +make_key(entry::Type{<:PSI.AuxVariableType}, component::Type{<:Component}) = + PSI.AuxVarKey(entry, component) +make_key(entry::Type{<:PSI.InitialConditionType}, component::Type{<:Component}) = + PSI.ICKey(entry, component) + +"Sort a vector of key tuples into variables, parameters, etc. like PSI.load_results! wants" +make_entry_kwargs(key_tuples::Vector{<:Tuple}) = [ + (key_name => filter(((this_key, _),) -> this_key <: key_type, key_tuples)) + for (key_name, key_type) in [ + (:variables, PSI.VariableType), + (:duals, PSI.ConstraintType), + (:parameters, PSI.ParameterType), + (:aux_variables, PSI.AuxVariableType), + (:expressions, PSI.ExpressionType), + ] +] + +# TODO test +"Given an EntryType and a Component, fetch a single column of results" +function read_component_result(res::IS.Results, entry::Type{<:EntryType}, comp::Component, + start_time::Union{Nothing, Dates.DateTime}, len::Union{Int, Nothing}) + cache_len = isnothing(len) ? length(PSI.get_timestamps(res)) : len + key_pair = (entry, typeof(comp)) + PSI.load_results!( + res, + cache_len; + initial_time = start_time, + make_entry_kwargs([key_pair])..., + ) + key = make_key(key_pair...) + res = try + first( + values( + PSI.read_results_with_keys( + res, + [key]; + start_time = start_time, + len = len, + cols = [get_name(comp)], + ), + ), + ) + catch e + if e isa KeyError && e.key == get_name(comp) + throw( + NoResultError( + "$(get_name(comp)) not in the results for $(PSI.encode_key_as_string(key))", + ), + ) + else + rethrow(e) + end + end + return res[!, [DATETIME_COL, get_name(comp)]] +end + +# TODO caching here too +"Given an EntryType that applies to the System, fetch a single column of results" +function read_system_result(res::IS.Results, entry::Type{<:SystemEntryType}, + start_time::Union{Nothing, Dates.DateTime}, len::Union{Int, Nothing}) + key = make_key(entry, PSY.System) + res = first( + values(PSI.read_results_with_keys( + res, + [key]; + start_time = start_time, + len = len, + )), + ) + @assert size(res, 2) == 2 "Expected a time column and a data column in the results for $(PSI.encode_key_as_string(key)), got $(size(res, 2)) columns" + @assert DATETIME_COL in names(res) "Expected a column named $DATETIME_COL in the results for $(PSI.encode_key_as_string(key)), got $(names(res))" + # Whatever the non-time column is, rename it to something standard + res = DataFrames.rename(res, findfirst(!=(DATETIME_COL), names(res)) => SYSTEM_COL) + return res[!, [DATETIME_COL, SYSTEM_COL]] +end + +# TODO test +"Convenience function to convert an EntryType to a function and make a ComponentTimedMetric from it" +make_component_metric_from_entry( + name::String, + description::String, + key::Type{<:EntryType}, +) = + ComponentTimedMetric(name, description, + (res::IS.Results, comp::Component, + start_time::Union{Nothing, Dates.DateTime}, len::Union{Int, Nothing}) -> + read_component_result(res, key, comp, start_time, len)) + +# TODO test +"Convenience function to convert a SystemEntryType to a function and make a SystemTimedMetric from it" +make_system_metric_from_entry( + name::String, + description::String, + key::Type{<:SystemEntryType}, +) = + SystemTimedMetric(name, description, + (res::IS.Results, + start_time::Union{Nothing, Dates.DateTime}, len::Union{Int, Nothing}) -> + read_system_result(res, key, start_time, len)) + +"Compute the mean of `values` weighted by the corresponding entries of `weights`." +function weighted_mean(values, weights) # Made quite finnicky by the need to broadcast in two dimensions (or am I just new to this?) + new_values = @. broadcast(ifelse, (.==)(weights, 0), 0.0, values) # Allow 0 weight to cancel out NaN value + return sum((.*).(new_values, weights)) ./ sum(weights) +end + +weighted_mean(empty) = + if length(empty) == 0 || all(.!(empty .== empty)) # Zero length or all NaNs + weighted_mean(empty, empty) + else + throw( + ArgumentError( + "weighted_mean needs two arguments unless the first argument has zero length or is all NaN", + ), + ) + end + +"A version of `sum` that ignores a second argument, for use where aggregation metadata is at play" +unweighted_sum(x) = sum(x) +unweighted_sum(x, y) = sum(x) + +# BEGIN BUILT-IN METRICS DEFINITIONS +# TODO perhaps these built-in metrics should be in some sort of container + +# NOTE ActivePowerVariable is in units of megawatts per simulation time period, so it's +# actually energy and it makes sense to sum it up. +calc_active_power = make_component_metric_from_entry( + "ActivePower", + "Calculate the active power of the specified ComponentSelector", + PSI.ActivePowerVariable, +) + +calc_production_cost = make_component_metric_from_entry( + "ProductionCost", + "Calculate the active power output of the specified ComponentSelector", + PSI.ProductionCostExpression, +) + +calc_active_power_in = make_component_metric_from_entry( + "ActivePowerIn", + "Calculate the active power input to the specified (storage) ComponentSelector", + PSI.ActivePowerInVariable, +) + +calc_active_power_out = make_component_metric_from_entry( + "ActivePowerOut", + "Calculate the active power output of the specified (storage) ComponentSelector", + PSI.ActivePowerOutVariable, +) + +calc_stored_energy = make_component_metric_from_entry( + "StoredEnergy", + "Calculate the energy stored in the specified (storage) ComponentSelector", + PSI.EnergyVariable, +) + +calc_load_from_storage = compose_metrics( + "LoadFromStorage", + "Calculate the ActivePowerIn minus the ActivePowerOut of the specified (storage) ComponentSelector", + (-), + calc_active_power_in, calc_active_power_out) + +calc_active_power_forecast = make_component_metric_from_entry( + "ActivePowerForecast", + "Fetch the forecast active power of the specified ComponentSelector", + PSI.ActivePowerTimeSeriesParameter, +) + +calc_load_forecast = ComponentTimedMetric( + "LoadForecast", + "Fetch the forecast active load of the specified ComponentSelector", + # Load is negative power + # NOTE if we had our own time-indexed dataframe type we could overload multiplication with a scalar and simplify this + (args...) -> let + val = compute(calc_active_power_forecast, args...) + data_vec(val) .*= -1 + return val + end, +) + +calc_system_load_forecast = SystemTimedMetric( + "SystemLoadForecast", + "Fetch the forecast active load of all the ElectricLoad Components in the system", + (res::IS.Results, st::Union{Nothing, Dates.DateTime}, len::Union{Int, Nothing}) -> + compute(calc_load_forecast, res, load_component_selector, st, len), +) + +calc_system_load_from_storage = let + SystemTimedMetric( + "SystemLoadFromStorage", + "Fetch the LoadFromStorage of all storage in the system", + (res::IS.Results, st::Union{Nothing, Dates.DateTime}, len::Union{Int, Nothing}) -> + compute(calc_load_from_storage, res, storage_component_selector, st, len), + ) +end + +calc_net_load_forecast = compose_metrics( + "NetLoadForecast", + "SystemLoadForecast minus ActivePowerForecast of the given ComponentSelector", + # (intentionally done with forecast to inform how storage should be used, among other reasons) + (-), + calc_system_load_forecast, calc_active_power_forecast) + +calc_curtailment = compose_metrics( + "Curtailment", + "Calculate the ActivePowerForecast minus the ActivePower of the given ComponentSelector", + (-), + calc_active_power_forecast, calc_active_power, +) + +calc_curtailment_frac = ComponentTimedMetric( + "CurtailmentFrac", + "Calculate the Curtailment as a fraction of the ActivePowerForecast of the given ComponentSelector", + ( + (res::IS.Results, comp::Component, + start_time::Union{Nothing, Dates.DateTime}, len::Union{Int, Nothing}, + ) -> let + result = compute(calc_curtailment, res, comp, start_time, len) + power = collect( + data_vec( + compute(calc_active_power_forecast, res, comp, start_time, len), + ), + ) + data_vec(result) ./= power + set_agg_meta!(result, power) + return result + end + ); component_agg_fn = weighted_mean, time_agg_fn = weighted_mean, +) + +# Helper function for calc_integration +_integration_denoms(res, start_time, len) = + compute(calc_system_load_forecast, res, start_time, len), + compute(calc_system_load_from_storage, res, start_time, len) + +calc_integration = ComponentTimedMetric( + "Integration", + "Calculate the ActivePower of the given ComponentSelector over the sum of the SystemLoadForecast and the SystemLoadFromStorage", + ( + (res::IS.Results, comp::Component, + start_time::Union{Nothing, Dates.DateTime}, len::Union{Int, Nothing}, + ) -> let + result = compute(calc_active_power, res, comp, start_time, len) + # TODO does not check date alignment, maybe use hcat_timed + denom = (.+)( + (_integration_denoms(res, start_time, len) .|> data_vec .|> collect)..., + ) + data_vec(result) ./= denom + set_agg_meta!(result, denom) + return result + end + ); component_agg_fn = unweighted_sum, time_agg_fn = weighted_mean, + component_meta_agg_fn = mean, + # We use a custom eval_zero to put the weight in there even when there are no components + eval_zero = (res::IS.Results, + start_time::Union{Nothing, Dates.DateTime}, len::Union{Int, Nothing}, + ) -> let + denoms = _integration_denoms(res, start_time, len) + # TODO does not check date alignment, maybe use hcat_timed + time_col = time_vec(first(denoms)) + data_col = repeat([0.0], length(time_col)) + result = DataFrame(DATETIME_COL => time_col, "" => data_col) + set_agg_meta!(result, (.+)((denoms .|> data_vec .|> collect)...)) + end, +) + +calc_capacity_factor = ComponentTimedMetric( + "CapacityFactor", + "Calculate the capacity factor (actual production/rated production) of the specified ComponentSelector", + # (intentionally done with forecast to serve as sanity check -- solar capacity factor shouldn't exceed 20%, etc.) + ( + (res::IS.Results, comp::Component, + start_time::Union{Nothing, Dates.DateTime}, len::Union{Int, Nothing}, + ) -> let + result = compute(calc_active_power_forecast, res, comp, start_time, len) + rating = PSY.get_rating(comp) + data_vec(result) ./= rating + set_agg_meta!(result, repeat([rating], length(data_vec(result)))) + return result + end + ); component_agg_fn = weighted_mean, time_agg_fn = weighted_mean, +) + +calc_startup_cost = ComponentTimedMetric( + "StartupCost", + "Calculate the startup cost of the specified ComponentSelector", + ( + (res::IS.Results, comp::Component, + start_time::Union{Nothing, Dates.DateTime}, len::Union{Int, Nothing}) -> let + val = read_component_result(res, PSI.StartVariable, comp, start_time, len) + start_cost = PSY.get_start_up(PSY.get_operation_cost(comp)) + data_vec(val) .*= start_cost + return val + end + ), +) + +calc_shutdown_cost = ComponentTimedMetric( + "ShutdownCost", + "Calculate the shutdown cost of the specified ComponentSelector", + ( + (res::IS.Results, comp::Component, + start_time::Union{Nothing, Dates.DateTime}, len::Union{Int, Nothing}) -> let + val = read_component_result(res, PSI.StartVariable, comp, start_time, len) + stop_cost = PSY.get_shut_down(PSY.get_operation_cost(comp)) + data_vec(val) .*= stop_cost + return val + end + ), +) + +calc_total_cost = ComponentTimedMetric( + "TotalCost", + "Calculate the production+startup+shutdown cost of the specified ComponentSelector; startup and shutdown costs are assumed to be zero if undefined", + (args...) -> let + production = compute(calc_production_cost, args...) + startup = try + data_vec(compute(calc_startup_cost, args...)) + catch + repeat(0.0, size(production, 1)) + end + shutdown = try + data_vec(compute(calc_shutdown_cost, args...)) + catch + repeat(0.0, size(production, 1)) + end + # NOTE if I ever make my own type for timed dataframes, should do custom setindex! to make this less painful + production[!, first(data_cols(production))] += startup + shutdown + return production + end, +) + +calc_discharge_cycles = ComponentTimedMetric( + "DischargeCycles", + "Calculate the number of discharge cycles a storage device has gone through in the time period", + # NOTE: here, we define one "cycle" as a discharge from the maximum state of charge to + # the minimum state of charge. A simpler algorithm might define a cycle as a discharge + # from the maximum state of charge to zero; the algorithm given here is more rigorous. + ( + (res::IS.Results, comp::Component, + start_time::Union{Nothing, Dates.DateTime}, len::Union{Int, Nothing}) -> let + val = read_component_result(res, PSI.ActivePowerOutVariable, comp, start_time, len) + soc_limits = PSY.get_state_of_charge_limits(comp) + soc_range = soc_limits.max - soc_limits.min + data_vec(val) ./= soc_range + return val + end + ), +) + +calc_system_slack_up = make_system_metric_from_entry( + "SystemSlackUp", + "Calculate the system balance slack up", + PSI.SystemBalanceSlackUp, +) + +""" +Create a boolean Metric for whether the given time period has system balance slack up of +magnitude greater than the `threshold` argument +""" +make_calc_is_slack_up(threshold::Real) = SystemTimedMetric( + "IsSlackUp($threshold)", + "Calculate whether the given time period has system balance slack up of magnitude greater than $threshold", + (args...) -> let + val = compute(calc_system_slack_up, args...) + val[!, first(data_cols(val))] = + abs.(val[!, first(data_cols(val))]) .> threshold + return val + end, +) + +# TODO is this the appropriate place to put a default threshold (and is it the appropriate default)? +calc_is_slack_up = make_calc_is_slack_up(1e-3) + +# TODO caching here too +make_results_metric_from_sum_optimizer_stat( + name::String, + description::String, + stats_key::String) = ResultsTimelessMetric( + name, + description, + (res::IS.Results) -> sum(PSI.read_optimizer_stats(res)[!, stats_key]), +) + +calc_sum_objective_value = make_results_metric_from_sum_optimizer_stat( + "SumObjectiveValue", + "Sum the objective values achieved in the optimization problems", + "objective_value") + +calc_sum_solve_time = make_results_metric_from_sum_optimizer_stat( + "SumSolveTime", + "Sum the solve times taken by the optimization problems", + "solve_time") + +calc_sum_bytes_alloc = make_results_metric_from_sum_optimizer_stat( + "SumBytesAlloc", + "Sum the bytes allocated to the optimization problems", + "solve_bytes_alloc") diff --git a/src/input.jl b/src/input.jl new file mode 100644 index 0000000..4b951cf --- /dev/null +++ b/src/input.jl @@ -0,0 +1,31 @@ + +""" +Accept a directory that contains several results subdirectories (that each contain +`results`, `problems`, etc. sub-subdirectories) and construct a sorted dictionary from +`String` to `SimulationProblemResults` where the keys are the subdirectory names and the +values are loaded results datasets. + +# Arguments + - `results_dir::AbstractString`: the directory where results subdirectories can be found + - `problem::String`: the name of the problem to load (e.g., "UC", "ED") + - `scenarios::Union{Vector{AbstractString}, Nothing} = nothing`: a list of scenario + subdirectories to load, or `nothing` to load all the subdirectories + - `kwargs...`: keyword arguments to pass through to + [`PSI.get_decision_problem_results`](@ref) +""" +function create_problem_results_dict( + results_dir::AbstractString, + problem::String, + scenarios::Union{Vector{<:AbstractString}, Nothing} = nothing; + populate_system::Bool = false, + kwargs..., +) + if scenarios === nothing + scenarios = filter(x -> isdir(joinpath(results_dir, x)), readdir(results_dir)) + end + return SortedDict( + scenario => PSI.get_decision_problem_results( + PSI.SimulationResults(joinpath(results_dir, scenario)), + problem; populate_system = populate_system, kwargs...) for scenario in scenarios + ) +end diff --git a/src/metrics.jl b/src/metrics.jl new file mode 100644 index 0000000..e3308d6 --- /dev/null +++ b/src/metrics.jl @@ -0,0 +1,859 @@ +# TODO there is probably a more principled way of structuring this Metric type hierarchy -- maybe parameterization? +"The basic type for all `Metrics`." +abstract type Metric end + +"Time series `Metrics`." +abstract type TimedMetric <: Metric end + +"Scalar-in-time `Metrics`." +abstract type TimelessMetric <: Metric end + +"Time series `Metrics` defined on `ComponentSelector`s." +abstract type ComponentSelectorTimedMetric <: TimedMetric end + +""" +`ComponentSelectorTimedMetrics` implemented by evaluating a function on each `Component`. + +# Arguments + - `name::String`: the name of the `Metric` + - `description::String`: a description of the `Metric` + - `eval_fn`: a callable with signature + `(::IS.Results, ::Component, ::Union{Nothing, Dates.DateTime}, ::Union{Int, Nothing})` + that returns a DataFrame representing the results for that `Component` + - `component_agg_fn`: optional, a callable to aggregate results between + `Component`s/`ComponentSelector`s, defaults to `sum` + - `time_agg_fn`: optional, a callable to aggregate results across time, defaults to `sum` + +""" +struct ComponentTimedMetric <: ComponentSelectorTimedMetric + name::String + description::String + eval_fn::Any + component_agg_fn::Any + time_agg_fn::Any + component_meta_agg_fn::Any + time_meta_agg_fn::Any + eval_zero::Any +end + +# TODO test component_meta_agg_fn, time_meta_agg_fn, eval_zero if keeping them +ComponentTimedMetric( + name::String, + description::String, + eval_fn::Function; + component_agg_fn = sum, + time_agg_fn = sum, + component_meta_agg_fn = sum, + time_meta_agg_fn = sum, + eval_zero = nothing, +) = ComponentTimedMetric( + name, + description, + eval_fn, + component_agg_fn, + time_agg_fn, + component_meta_agg_fn, + time_meta_agg_fn, + eval_zero, +) + +# TODO test CustomTimedMetric +""" +`ComponentSelectorTimedMetrics` implemented without drilling down to the base `Component`s, +just call the `eval_fn` directly. + +# Arguments + - `name::String`: the name of the `Metric` + - `description::String`: a description of the `Metric` + - `eval_fn`: a callable with signature `(::IS.Results, ::Union{ComponentSelector, + Component}, ::Union{Nothing, Dates.DateTime}, ::Union{Int, Nothing})` that returns a + DataFrame representing the results for that `Component` + - `time_agg_fn`: optional, a callable to aggregate results across time, defaults to `sum` +""" +struct CustomTimedMetric <: ComponentSelectorTimedMetric + name::String + description::String + eval_fn::Any + time_agg_fn::Any + time_meta_agg_fn::Any +end + +CustomTimedMetric( + name::String, + description::String, + eval_fn::Function; + time_agg_fn = sum, + time_meta_agg_fn = sum, +) = + CustomTimedMetric(name, description, eval_fn, time_agg_fn, time_meta_agg_fn) + +""" +Time series `Metrics` defined on `Systems`. + +# Arguments + - `name::String`: the name of the `Metric` + - `description::String`: a description of the `Metric` + - `eval_fn`: a callable with signature + `(::IS.Results, ::Union{Nothing, Dates.DateTime}, ::Union{Int, Nothing})` that returns a + DataFrame representing the results + - `time_agg_fn`: optional, a callable to aggregate results across time, defaults to `sum` +""" +struct SystemTimedMetric <: TimedMetric + name::String + description::String + eval_fn::Any + time_agg_fn::Any + time_meta_agg_fn::Any +end + +SystemTimedMetric( + name::String, + description::String, + eval_fn::Function; + time_agg_fn = sum, + time_meta_agg_fn = sum, +) = + SystemTimedMetric(name, description, eval_fn, time_agg_fn, time_meta_agg_fn) + +""" +Timeless Metrics with a single value per `IS.Results` instance + +# Arguments + - `name::String`: the name of the `Metric` + - `description::String`: a description of the `Metric` + - `eval_fn`: a callable with signature `(::IS.Results,)` that returns a `DataFrame` + representing the results +""" +struct ResultsTimelessMetric <: TimelessMetric + name::String + description::String + eval_fn::Function +end + +""" +The metric does not have a result for the `Component`/`ComponentSelector`/etc. on which it +is being called. +""" +struct NoResultError <: Exception + msg::AbstractString +end + +# TODO remove :DateTime hardcoding in PowerSimulations +"Name of the column that represents the time axis in computed DataFrames" +const DATETIME_COL::String = "DateTime" + +""" +Column metadata key whose value signifies whether the column is metadata. Metadata columns +are excluded from `data_cols` and similar and can be used to represent things like a time +aggregation. +""" +const META_COL_KEY::String = "meta_col" + +"Name of a column that represents whole-of-`System` data" +const SYSTEM_COL::String = "System" + +"Name of a column that represents whole-of-`Results` data" +const RESULTS_COL::String = "Results" + +""" +Column metadata key whose value, if any, is additional information to be passed to +aggregation functions. Values of `nothing` are equivalent to absence of the entry. +""" +const AGG_META_KEY::String = "agg_meta" + +# Override these if you define Metric subtypes with different implementations +get_name(m::Metric) = m.name +get_description(m::Metric) = m.description +get_time_agg_fn(m::TimedMetric) = m.time_agg_fn +# TODO is there a naming convention for this kind of function? +"Returns a `Metric` identical to the input except with the given `time_agg_fn`" +with_time_agg_fn(m::T, time_agg_fn) where {T <: ComponentSelectorTimedMetric} = + T(m.name, m.description, m.eval_fn; time_agg_fn = time_agg_fn) + +get_component_agg_fn(m::ComponentTimedMetric) = m.component_agg_fn +"Returns a `Metric` identical to the input except with the given `component_agg_fn`" +with_component_agg_fn(m::ComponentTimedMetric, component_agg_fn) = + ComponentTimedMetric( + m.name, + m.description, + m.eval_fn; + component_agg_fn = component_agg_fn, + ) + +get_time_meta_agg_fn(m::TimedMetric) = m.time_meta_agg_fn +"Returns a `Metric` identical to the input except with the given `time_meta_agg_fn`" +with_time_meta_agg_fn(m::T, time_meta_agg_fn) where {T <: ComponentSelectorTimedMetric} = + T(m.name, m.description, m.eval_fn; time_meta_agg_fn = time_meta_agg_fn) + +get_component_meta_agg_fn(m::ComponentTimedMetric) = m.component_meta_agg_fn +"Returns a `Metric` identical to the input except with the given `component_meta_agg_fn`" +with_component_meta_agg_fn(m::ComponentTimedMetric, component_meta_agg_fn) = + ComponentTimedMetric( + m.name, + m.description, + m.eval_fn; + component_meta_agg_fn = component_meta_agg_fn, + ) + +""" +Canonical way to represent a `(Metric, ComponentSelector)` or `(Metric, Component)` pair as +a string. +""" +metric_selector_to_string(m::Metric, e::Union{ComponentSelector, Component}) = + get_name(m) * NAME_DELIMETER * get_name(e) + +"Check whether a column is metadata" +is_col_meta(df, colname) = get(colmetadata(df, colname), META_COL_KEY, false) + +"Mark a column as metadata" +set_col_meta!(df, colname, val = true) = + colmetadata!(df, colname, META_COL_KEY, val; style = :note) + +"Get the column's aggregation metadata; return `nothing` if there is none." +get_agg_meta(df, colname) = get(colmetadata(df, colname), AGG_META_KEY, nothing) + +"Get the single data column's aggregation metadata; error on multiple data columns." +function get_agg_meta(df) + my_data_cols = data_cols(df) + (length(my_data_cols) == 1) && return get_agg_meta(df, first(my_data_cols)) + throw( + ArgumentError( + "DataFrame has $(size(the_data, 2)) columns of data, must specify a column name", + ), + ) +end + +"Set the column's aggregation metadata." +set_agg_meta!(df, colname, val) = + colmetadata!(df, colname, AGG_META_KEY, val; style = :note) + +"Set the single data column's aggregation metadata; error on multiple data columns." +function set_agg_meta!(df, val) + my_data_cols = data_cols(df) + (length(my_data_cols) == 1) && return set_agg_meta!(df, first(my_data_cols), val) + throw( + ArgumentError( + "DataFrame has $(size(the_data, 2)) columns of data, must specify a column name", + ), + ) +end + +# TODO test that mutating the selection mutates the original +"Select the `DateTime` column of the `DataFrame` as a one-column `DataFrame` without copying." +time_df(df::DataFrames.AbstractDataFrame) = + DataFrames.select(df, DATETIME_COL; copycols = false) + +"Select the `DateTime` column of the `DataFrame` as a `Vector` without copying." +time_vec(df::DataFrames.AbstractDataFrame) = df[!, DATETIME_COL] + +""" +Select the names of the data columns of the `DataFrame`, i.e., those that are not `DateTime` +and not metadata. +""" +data_cols(df::DataFrames.AbstractDataFrame) = + filter( + ( + colname -> + (colname != DATETIME_COL) && + !is_col_meta(df, colname) + ), + names(df)) + +"Select the data columns of the `DataFrame` as a `DataFrame` without copying." +data_df(df::DataFrames.AbstractDataFrame) = + DataFrames.select(df, data_cols(df); copycols = false) + +"Select the data column of the `DataFrame` as a vector without copying, errors if more than one." +function data_vec(df::DataFrames.AbstractDataFrame) + the_data = data_df(df) + (size(the_data, 2) > 1) && throw( + ArgumentError( + "DataFrame has $(size(the_data, 2)) columns of data, consider using data_mat", + ), + ) + return the_data[!, 1] +end + +"Select the data columns of the `DataFrame` as a `Matrix` with copying." +data_mat(df::DataFrames.AbstractDataFrame) = Matrix(data_df(df)) + +# Validation and metadata management helper function for various compute methods +function _compute_meta_timed!(val, metric, results) + (DATETIME_COL in names(val)) || throw(ArgumentError( + "Result metric.eval_fn did not include a $DATETIME_COL column")) + set_col_meta!(val, DATETIME_COL) + _compute_meta_generic!(val, metric, results) +end + +function _compute_meta_generic!(val, metric, results) + metadata!(val, "title", get_name(metric); style = :note) + metadata!(val, "metric", metric; style = :note) + metadata!(val, "results", results; style = :note) + colmetadata!( + val, + findfirst(!=(DATETIME_COL), names(val)), + "metric", + metric; + style = :note, + ) +end + +# Helper function to call eval_fn and set the appropriate metadata +function _compute_component_timed_helper(metric::ComponentSelectorTimedMetric, + results::IS.Results, + comp::Union{Component, ComponentSelector}; + start_time::Union{Nothing, Dates.DateTime} = nothing, + len::Union{Int, Nothing} = nothing) + val = metric.eval_fn(results, comp, start_time, len) + _compute_meta_timed!(val, metric, results) + colmetadata!(val, 2, "components", [comp]; style = :note) + return val +end + +""" +Compute the given metric on the given component within the given set of results, returning a +`DataFrame` with a `DateTime` column and a data column labeled with the component's name. + +# Arguments + - `metric::ComponentTimedMetric`: the metric to compute + - `results::IS.Results`: the results from which to fetch data + - `comp::Component`: the component on which to compute the metric + - `start_time::Union{Nothing, Dates.DateTime} = nothing`: the time at which the resulting + time series should begin + - `len::Union{Int, Nothing} = nothing`: the number of steps in the resulting time series +""" +compute(metric::ComponentTimedMetric, results::IS.Results, comp::Component; + start_time::Union{Nothing, Dates.DateTime} = nothing, + len::Union{Int, Nothing} = nothing) = + _compute_component_timed_helper(metric, results, comp; start_time, len) + +""" +Compute the given metric on the given component within the given set of results, returning a +`DataFrame` with a `DateTime` column and a data column labeled with the component's name. + +# Arguments + - `metric::CustomTimedMetric`: the metric to compute + - `results::IS.Results`: the results from which to fetch data + - `comp::Component`: the component on which to compute the metric + - `start_time::Union{Nothing, Dates.DateTime} = nothing`: the time at which the resulting + time series should begin + - `len::Union{Int, Nothing} = nothing`: the number of steps in the resulting time series +""" +compute(metric::CustomTimedMetric, results::IS.Results, + comp::Union{Component, ComponentSelector}; + start_time::Union{Nothing, Dates.DateTime} = nothing, + len::Union{Int, Nothing} = nothing) = + _compute_component_timed_helper(metric, results, comp; start_time, len) + +""" +Compute the given metric on the `System` associated with the given set of results, returning +a `DataFrame` with a `DateTime` column and a data column. + +# Arguments + - `metric::SystemTimedMetric`: the metric to compute + - `results::IS.Results`: the results from which to fetch data + - `start_time::Union{Nothing, Dates.DateTime} = nothing`: the time at which the resulting + time series should begin + - `len::Union{Int, Nothing} = nothing`: the number of steps in the resulting time series +""" +function compute(metric::SystemTimedMetric, results::IS.Results; + start_time::Union{Nothing, Dates.DateTime} = nothing, + len::Union{Int, Nothing} = nothing) + val = metric.eval_fn(results, start_time, len) + _compute_meta_timed!(val, metric, results) + return val +end + +""" +Compute the given metric on the given set of results, returning a DataFrame with a single +cell. + +# Arguments + - `metric::ResultsTimelessMetric`: the metric to compute + - `results::IS.Results`: the results from which to fetch data +""" +function compute(metric::ResultsTimelessMetric, results::IS.Results) + val = DataFrame(RESULTS_COL => [metric.eval_fn(results)]) + _compute_meta_generic!(val, metric, results) + return val +end + +""" +Compute the given metric on the given set of results, returning a `DataFrame` with a single +cell; takes a `Nothing` where the `ComponentSelectorTimedMetric` method of this function would take a +`Component`/`ComponentSelector` for convenience +""" +compute(metric::ResultsTimelessMetric, results::IS.Results, selector::Nothing) = + compute(metric, results) + +""" +Compute the given metric on the `System` associated with the given set of results, returning +a `DataFrame` with a `DateTime` column and a data column; takes a `Nothing` where the +`ComponentSelectorTimedMetric` method of this function would take a `Component`/`ComponentSelector` for +convenience +""" +compute(metric::SystemTimedMetric, results::IS.Results, selector::Nothing; + start_time::Union{Nothing, Dates.DateTime} = nothing, + len::Union{Int, Nothing} = nothing) = compute(metric, results; + start_time = start_time, len = len) + +# TODO test allow_missing behavior +function _extract_common_time(dfs::DataFrames.AbstractDataFrame...; + allow_missing = true, ex_fn::Function = time_vec) + time_cols = ex_fn.(dfs) + allow_missing || !any([any(ismissing.(ex_fn(tc))) for tc in time_cols]) || + throw(ErrorException("Missing time columns")) + # Candidate time column is the one with the most non-missing values + time_col = argmax(x -> count(!ismissing, Array(x)), time_cols) + # Other time columns must either be the same or [nothing] + # TODO come up with a more informative error here + all([ + isequal(sub, time_col) || + (all(ismissing.(Array(sub))) && size(sub, 1) == 1) for sub in time_cols + ]) || + throw(ErrorException("Mismatched time columns")) + return time_col +end + +# TODO test +function _broadcast_time(data_cols, time_col; allow_unitary = true) + size(data_cols, 1) == size(time_col, 1) && return data_cols + (allow_unitary && size(data_cols, 1) == 1) || + throw(ErrorException("Individual data column does not match aggregate time column")) + return repeat(data_cols, size(time_col, 1)) # Preserves metadata +end + +# NOTE this really makes the case for a dedicated type for time-indexed dataframes, then +# this would just be another hcat method to multiply dispatch +""" +If the time axes match across all the `DataFrames`, horizontally concatenate them and remove +the duplicate time axes. If not, throw an error +""" +function hcat_timed(vals::DataFrame...) # TODO incorporate allow_missing + time_col = _extract_common_time(vals...; ex_fn = time_df) + broadcasted_vals = [data_df(sub) for sub in _broadcast_time.(vals, Ref(time_col))] + return hcat(time_col, broadcasted_vals...) +end + +""" +Compute the given `Metric` on the given `ComponentSelector` within the given set of results, aggregating +across all the components in the `ComponentSelector` if necessary and returning a `DataFrame` with a +`DateTime` column and a data column labeled with the `ComponentSelector`'s name. + +# Arguments + - `metric::ComponentTimedMetric`: the metric to compute + - `results::IS.Results`: the results from which to fetch data + - `selector::ComponentSelector`: the `ComponentSelector` on which to compute the metric + - `start_time::Union{Nothing, Dates.DateTime} = nothing`: the time at which the resulting + time series should begin + - `len::Union{Int, Nothing} = nothing`: the number of steps in the resulting time series +""" +function compute(metric::ComponentTimedMetric, results::IS.Results, + selector::ComponentSelector; + start_time::Union{Nothing, Dates.DateTime} = nothing, + len::Union{Int, Nothing} = nothing) + # TODO incorporate allow_missing + agg_fn = get_component_agg_fn(metric) + meta_agg_fn = get_component_meta_agg_fn(metric) + components = get_components(selector, PowerSimulations.get_system(results)) + vals = [ + compute(metric, results, com; start_time = start_time, len = len) for + com in components + ] + if length(vals) == 0 + if metric.eval_zero !== nothing + result = metric.eval_zero(results, start_time, len) + else + time_col = Vector{Union{Missing, Dates.DateTime}}([missing]) + data_col = agg_fn(Vector{Float64}()) + new_agg_meta = nothing + result = DataFrame(DATETIME_COL => time_col, get_name(selector) => data_col) + end + else + time_col = _extract_common_time(vals...) + data_vecs = [data_vec(sub) for sub in _broadcast_time.(vals, Ref(time_col))] + agg_metas = get_agg_meta.(vals) + is_agg_meta = !all(agg_metas .=== nothing) + data_col = is_agg_meta ? agg_fn(data_vecs, agg_metas) : agg_fn(data_vecs) + new_agg_meta = is_agg_meta ? meta_agg_fn(agg_metas) : nothing + result = DataFrame(DATETIME_COL => time_col, get_name(selector) => data_col) + (new_agg_meta === nothing) || set_agg_meta!(result, new_agg_meta) + end + + _compute_meta_timed!(result, metric, results) + colmetadata!(result, 2, "components", components; style = :note) + colmetadata!(result, 2, "ComponentSelector", selector; style = :note) + return result +end + +# TODO these are currently necessary because eval_fn is supposed to take start_time and len +# as positional arguments and compute as kwargs; would it be better to just switch one of +# those? +"""A version of `compute` that takes positional arguments for convenience""" +compute(met, res, ent, start_time, len) = + compute(met, res, ent; start_time = start_time, len = len) + +"""A version of `compute` that takes positional arguments for convenience""" +compute(met, res, start_time, len) = + compute(met, res; start_time = start_time, len = len) + +# TODO function compute_set +""" +Compute the given metric on the subselectors of the given `ComponentSelector` within the +given set of results, returning a `DataFrame` with a `DateTime` column and a data column for +each subselector. Should be the same as calling `compute` on each subselector and +concatenating. + +# Arguments + - `metric::ComponentSelectorTimedMetric`: the metric to compute + - `results::IS.Results`: the results from which to fetch data + - `selector::ComponentSelector`: the `ComponentSelector` on whose subselectors to compute + the metric + - `start_time::Union{Nothing, Dates.DateTime} = nothing`: the time at which the resulting + time series should begin + - `len::Union{Int, Nothing} = nothing`: the number of steps in the resulting time series +""" +function compute_set(metric::ComponentSelectorTimedMetric, results::IS.Results, + selector::ComponentSelector; + start_time::Union{Nothing, Dates.DateTime} = nothing, + len::Union{Int, Nothing} = nothing) + subents = get_subselectors(selector, PowerSimulations.get_system(results)) + subcomputations = [compute(metric, results, sub; start_time, len) for sub in subents] + return hcat_timed(subcomputations...) +end + +# The core of compute_all, shared between the timed and timeless versions +function _common_compute_all(results, metrics, selectors, col_names; kwargs) + (selectors === nothing) && (selectors = fill(nothing, length(metrics))) + (selectors isa Vector) || (selectors = repeat([selectors], length(metrics))) + (col_names === nothing) && (col_names = fill(nothing, length(metrics))) + + length(selectors) == length(metrics) || throw( + ArgumentError("Got $(length(metrics)) metrics but $(length(selectors)) selectors")) + length(col_names) == length(metrics) || throw( + ArgumentError("Got $(length(metrics)) metrics but $(length(col_names)) names")) + + # For each triplet, do the computation, then rename the data column to the given name or + # construct our own name + return [ + let + computed = compute(metric, results, selector; kwargs...) + old_name = first(data_cols(computed)) + new_name = + (name === nothing) ? metric_selector_to_string(metric, selector) : name + DataFrames.rename(computed, old_name => new_name) + end + for (metric, selector, name) in zip(metrics, selectors, col_names) + ] +end + +""" +For each `(metric, selector, col_name)` tuple in `zip(metrics, selectors, col_names)`, call +[`compute`](@ref) and collect the results in a `DataFrame` with a single `DateTime` column. + +# Arguments + - `results::IS.Results`: the results from which to fetch data + - `metrics::Vector{<:TimedMetric}`: the metrics to compute + - `selectors`: either a scalar or vector of `Nothing`/`Component`/`ComponentSelector`: the + selectors on which to compute the metrics, or nothing for system/results metrics; + broadcast if scalar + - `col_names::Union{Nothing, Vector{<:Union{Nothing, AbstractString}}} = nothing`: a vector + of names for the columns of ouput data. Entries of `nothing` default to the result of + [`metric_selector_to_string`](@ref); `names = nothing` is equivalent to an entire vector + of `nothing` + - `kwargs...`: pass through to each [`compute`](@ref) call +""" +compute_all(results::IS.Results, + metrics::Vector{<:TimedMetric}, + selectors::Union{Nothing, Component, ComponentSelector, Vector} = nothing, + col_names::Union{Nothing, Vector{<:Union{Nothing, AbstractString}}} = nothing; + kwargs..., +) = hcat_timed(_common_compute_all(results, metrics, selectors, col_names; kwargs)...) + +""" +For each (metric, col_name) tuple in `zip(metrics, col_names)`, call [`compute`](@ref) and +collect the results in a DataFrame. + +# Arguments + - `results::IS.Results`: the results from which to fetch data + - `metrics::Vector{<:TimelessMetric}`: the metrics to compute + - `selectors`: either a scalar or vector of `Nothing`/`Component`/`ComponentSelector`: the + selectors on which to compute the metrics, or nothing for system/results metrics; + broadcast if scalar + - `col_names::Union{Nothing, Vector{<:Union{Nothing, AbstractString}}} = nothing`: a vector + of names for the columns of ouput data. Entries of `nothing` default to the result of + [`metric_selector_to_string`](@ref); `names = nothing` is equivalent to an entire vector of + `nothing` + - `kwargs...`: pass through to each [`compute`](@ref) call +""" +compute_all(results::IS.Results, metrics::Vector{<:TimelessMetric}, + selectors::Union{Nothing, Component, ComponentSelector, Vector} = nothing, + col_names::Union{Nothing, Vector{<:Union{Nothing, AbstractString}}} = nothing; + kwargs..., +) = hcat(_common_compute_all(results, metrics, selectors, col_names; kwargs)...) + +ComputationTuple = Tuple{<:T, Any, Any} where {T <: Union{TimedMetric, TimelessMetric}} +""" +For each (metric, selector, col_name) tuple in `computations`, call [`compute`](@ref) and +collect the results in a DataFrame with a single DateTime column. + +# Arguments + - `results::IS.Results`: the results from which to fetch data + - `computations::(Tuple{<:T, Any, Any} where T <: Union{TimedMetric, TimelessMetric})...`: + a list of the computations to perform, where each element is a (metric, selector, + col_name) where metric is the metric to compute, selector is the ComponentSelector on + which to compute the metric or nothing if not relevant, and col_name is the name for the + output column of data or nothing to use the default + - `kwargs...`: pass through to each [`compute`](@ref) call +""" +compute_all(results::IS.Results, computations::ComputationTuple...; kwargs...) = + compute_all(results, collect.(zip(computations...))...; kwargs...) + +# Sometimes, to construct new column names, we need to construct strings that don't appear +# as/in any other column names +function _make_unique_col_name(col_names; + allow_substring = false, initial_try = "newcol", suffix = "!") + col_name = initial_try + while allow_substring ? (col_name in col_names) : any(occursin.(col_name, col_names)) + col_name *= suffix + end + return col_name +end + +# Fetch the time_agg_fn associated with the particular column's Metric; error if no agg_fn can be determined +function _find_time_agg_fn(df, col_name, default_agg_fn) + my_agg_fn = default_agg_fn + col_md = colmetadata(df, col_name) + haskey(col_md, "metric") && (my_agg_fn = get_time_agg_fn(col_md["metric"])) + (my_agg_fn === nothing) && throw( + ArgumentError( + "No time aggregation function found for $col_name; specify in metric or use agg_fn kwarg $(col_md)", + ), + ) + return my_agg_fn +end + +# Construct a pipeline that can be passed to DataFrames.combine that represents the aggregation of the given column +function _construct_aggregation(df, agg_meta_colnames, col_name, default_agg_fn) + agg_fn = _find_time_agg_fn(df, col_name, default_agg_fn) + if haskey(agg_meta_colnames, col_name) + return [col_name, agg_meta_colnames[col_name]] => agg_fn => col_name + end + return col_name => agg_fn => col_name +end + +function _construct_meta_aggregation(df, col_name, meta_colname) + agg_fn = get_time_meta_agg_fn(colmetadata(df, col_name)["metric"]) + return meta_colname => agg_fn => meta_colname +end + +""" +Given a DataFrame like that produced by [`compute_all`](@ref), group by a function of the +time axis, apply a reduction, and report the resulting aggregation indexed by the first +timestamp in each group. + +# Arguments + - `df::DataFrames.AbstractDataFrame`: the DataFrame to operate upon + - `groupby_fn = nothing`: a callable that can be passed a DateTime; two rows will be in the + same group iff their timestamps produce the same result under `groupby_fn`. Note that + `groupby_fn = month` puts January 2023 and January 2024 into the same group whereas + `groupby_fn=(x -> (year(x), month(x)))` does not. + - `groupby_col::Union{Nothing, AbstractString, Symbol} = nothing`: specify a column name to + report the result of `groupby_fn` in the output DataFrame, or `nothing` to not + - `agg_fn = nothing`: by default, the aggregation function (`sum`/`mean`/etc.) is specified + by the Metric, which is read from the metadata of each column. If this metadata isn't + found, one can specify a default aggregation function like `sum` here; if nothing, an + error will be thrown. +""" +function aggregate_time( + df::DataFrames.AbstractDataFrame; + groupby_fn = nothing, + groupby_col::Union{Nothing, AbstractString, Symbol} = nothing, + agg_fn = nothing) + keep_groupby_col = (groupby_col !== nothing) + if groupby_fn === nothing && keep_groupby_col + throw(ArgumentError("Cannot keep the groupby column if not specifying groupby_fn")) + end + + # Everything goes into the same group by default + (groupby_fn === nothing) && (groupby_fn = (_ -> 0)) + + # Validate or create groupby column name + if keep_groupby_col + (groupby_col in names(df)) && + throw(ArgumentError("groupby_col cannot be an existing column name of df")) + else + groupby_col = _make_unique_col_name( + names(df); + allow_substring = true, + initial_try = "grouped", + ) + end + + # Find all aggregation metadata + # TODO should metadata columns be allowed to have aggregation metadata? Probably. + agg_metas = Dict(varname => get_agg_meta(df, varname) for varname in data_cols(df)) + + # Create column names for non-nothing aggregation metadata + existing_cols = vcat(names(df), groupby_col) + agg_meta_colnames = Dict( + varname => + _make_unique_col_name(existing_cols; initial_try = varname * "_meta") + for varname in data_cols(df) if agg_metas[varname] !== nothing) + cols_with_agg_meta = collect(keys(agg_meta_colnames)) + + # TODO currently we can only handle Vector aggregation metadata (eventually we'll + # probably need two optional aggregation metadata fields, one for per-column data and + # one for per-element data) + @assert all(typeof.([agg_metas[cn] for cn in cols_with_agg_meta]) .<: Vector) + @assert all( + length(agg_metas[orig_name]) == length(df[!, orig_name]) + for orig_name in cols_with_agg_meta + ) + + # Add the groupby column and aggregation metadata columns + transformed = DataFrames.transform( + df, + DATETIME_COL => DataFrames.ByRow(groupby_fn) => groupby_col, + ) + for orig_name in cols_with_agg_meta + transformed[!, agg_meta_colnames[orig_name]] = agg_metas[orig_name] + end + + grouped = DataFrames.groupby(transformed, groupby_col) + # For all data columns and non-special metadata columns, find the agg_fn and handle aggregation metadata + aggregations = [ + _construct_aggregation(df, agg_meta_colnames, col_name, agg_fn) + for col_name in names(df) if !(col_name in (groupby_col, DATETIME_COL)) + ] + meta_aggregations = [ + _construct_meta_aggregation(df, orig_name, agg_meta_colnames[orig_name]) + for orig_name in cols_with_agg_meta + ] + # Take the first DateTime in each group, reduce the other columns as specified in aggregations, preserve column names + # TODO is it okay to always just take the first timestamp, or should there be a + # reduce_time_fn kwarg to, for instance, allow users to specify that they want the + # midpoint timestamp? + combined = DataFrames.combine(grouped, + DATETIME_COL => first => DATETIME_COL, + aggregations..., meta_aggregations...) + + # Replace the aggregation metadata + for orig_name in cols_with_agg_meta + set_agg_meta!(combined, orig_name, combined[!, agg_meta_colnames[orig_name]]) + end + + # Drop agg_meta columns, reorder the columns for convention + not_index = DataFrames.Not(groupby_col, DATETIME_COL, values(agg_meta_colnames)...) + result = DataFrames.select(combined, DATETIME_COL, groupby_col, not_index) + + set_col_meta!(result, DATETIME_COL) + set_col_meta!(result, groupby_col) + keep_groupby_col || (result = DataFrames.select(result, DataFrames.Not(groupby_col))) + return result +end + +function _common_compose_metrics(res, ent, reduce_fn, metrics, output_col_name; kwargs...) + col_names = string.(range(1, length(metrics))) + sub_results = compute_all(res, collect(metrics), ent, col_names; kwargs...) + result = DataFrames.transform(sub_results, col_names => reduce_fn => output_col_name) + (DATETIME_COL in names(result)) && return result[!, [DATETIME_COL, output_col_name]] + return first(result[!, output_col_name]) # eval_fn of timeless metrics returns scalar +end + +""" +Given a list of metrics and a function that applies to their results to produce one result, +create a new metric that computes the sub-metrics and applies the function to produce its +own result. + +# Arguments + - `name::String`: the name of the new `Metric` + - `description::String`: the description of the new `Metric` + - `reduce_fn`: a callable that takes one value from each of the input `Metric`s and returns + a single value that will be the result of this `Metric`. "Value" means a vector (not a + `DataFrame`) in the case of `TimedMetrics` and a scalar for `TimelessMetrics`. + - `metrics`: the input `Metrics`. It is currently not possible to combine `TimedMetrics` + with `TimelessMetrics`, though it is possible to combine `ComponentSelectorTimedMetrics` + with `SystemTimedMetrics`. +""" +function compose_metrics end # For the unified docstring + +compose_metrics( + name::String, + description::String, + reduce_fn, + metrics::ComponentSelectorTimedMetric..., +) = CustomTimedMetric(name, description, + (res::IS.Results, ent::Union{Component, ComponentSelector}, + start_time::Union{Nothing, Dates.DateTime}, len::Union{Int, Nothing}) -> + _common_compose_metrics( + res, + ent, + reduce_fn, + metrics, + get_name(ent); + start_time = start_time, + len = len, + ), +) + +compose_metrics( + name::String, + description::String, + reduce_fn, + metrics::SystemTimedMetric...) = SystemTimedMetric(name, description, + ( + res::IS.Results, + start_time::Union{Nothing, Dates.DateTime}, + len::Union{Int, Nothing}, + ) -> + _common_compose_metrics( + res, + nothing, + reduce_fn, + metrics, + SYSTEM_COL; + start_time = start_time, + len = len, + ), +) + +compose_metrics( + name::String, + description::String, + reduce_fn, + metrics::ResultsTimelessMetric...) = ResultsTimelessMetric(name, description, + res::IS.Results -> + _common_compose_metrics( + res, + nothing, + reduce_fn, + metrics, + RESULTS_COL, + ), +) + +# Create a ComponentSelectorTimedMetric that wraps a SystemTimedMetric, disregarding the ComponentSelector +component_selector_metric_from_system_metric(in_metric::SystemTimedMetric) = + CustomTimedMetric( + get_name(in_metric), + get_description(in_metric), + (res::IS.Results, comp::Union{Component, ComponentSelector}, + start_time::Union{Nothing, Dates.DateTime}, len::Union{Int, Nothing}) -> + compute(in_metric, res, start_time, len)) + +# This one only gets triggered when we have at least one ComponentSelectorTimedMetric *and* +# at least one SystemTimedMetric, in which case the behavior is to treat the +# SystemTimedMetrics as if they applied to the selector +function compose_metrics( + name::String, + description::String, + reduce_fn, + metrics::Union{ComponentSelectorTimedMetric, SystemTimedMetric}...) + wrapped_metrics = [ + (m isa SystemTimedMetric) ? component_selector_metric_from_system_metric(m) : m + for + m in metrics + ] + return compose_metrics(name, description, reduce_fn, wrapped_metrics...) +end diff --git a/test/runtests.jl b/test/runtests.jl index 3238570..cf58d41 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,41 +1,4 @@ -using Test -using TestSetExtensions -using Logging -using Dates -using DataFrames -using DataStructures -import InfrastructureSystems -import InfrastructureSystems: Deterministic, Probabilistic, Scenarios, Forecast -using PowerSystems -using PowerAnalytics -using PowerSimulations -using GLPK -using TimeSeries -using StorageSystemsSimulations -using HydroPowerSimulations - -const PA = PowerAnalytics -const IS = InfrastructureSystems -const PSY = PowerSystems -const PSI = PowerSimulations -const LOG_FILE = "PowerAnalytics-test.log" - -const BASE_DIR = dirname(dirname(pathof(PowerAnalytics))) -const TEST_DIR = joinpath(BASE_DIR, "test") -const TEST_OUTPUTS = joinpath(BASE_DIR, "test", "test_results") -!isdir(TEST_OUTPUTS) && mkdir(TEST_OUTPUTS) -const TEST_RESULT_DIR = joinpath(TEST_OUTPUTS, "results") -!isdir(TEST_RESULT_DIR) && mkdir(TEST_RESULT_DIR) - -import PowerSystemCaseBuilder -const PSB = PowerSystemCaseBuilder - -LOG_LEVELS = Dict( - "Debug" => Logging.Debug, - "Info" => Logging.Info, - "Warn" => Logging.Warn, - "Error" => Logging.Error, -) +include("setuptests.jl") macro includetests(testarg...) if length(testarg) == 0 @@ -79,7 +42,6 @@ function get_logging_level(env_name::String, default) end function run_tests() - include(joinpath(BASE_DIR, "test", "test_data", "results_data.jl")) console_level = get_logging_level("PS_CONSOLE_LOG_LEVEL", "Error") console_logger = ConsoleLogger(stderr, console_level) file_level = get_logging_level("PS_LOG_LEVEL", "Info") diff --git a/test/setuptests.jl b/test/setuptests.jl new file mode 100644 index 0000000..56e780f --- /dev/null +++ b/test/setuptests.jl @@ -0,0 +1,42 @@ +using Test +using TestSetExtensions +using Logging +using Dates +using DataFrames +using DataStructures +import InfrastructureSystems +import InfrastructureSystems: Deterministic, Probabilistic, Scenarios, Forecast +using PowerSystems +using PowerAnalytics +using PowerSimulations +using GLPK +using TimeSeries +using StorageSystemsSimulations +using HydroPowerSimulations + +const PA = PowerAnalytics +const IS = InfrastructureSystems +const PSY = PowerSystems +const PSI = PowerSimulations +const LOG_FILE = "PowerAnalytics-test.log" + +const BASE_DIR = dirname(dirname(pathof(PowerAnalytics))) +const TEST_DIR = joinpath(BASE_DIR, "test") +const TEST_OUTPUTS = joinpath(BASE_DIR, "test", "test_results") +!isdir(TEST_OUTPUTS) && mkdir(TEST_OUTPUTS) +const TEST_RESULT_DIR = joinpath(TEST_OUTPUTS, "results") +!isdir(TEST_RESULT_DIR) && mkdir(TEST_RESULT_DIR) +const TEST_SIM_NAME = "results_sim" +const TEST_DUPLICATE_RESULTS_NAME = "temp_duplicate_results" + +import PowerSystemCaseBuilder +const PSB = PowerSystemCaseBuilder + +LOG_LEVELS = Dict( + "Debug" => Logging.Debug, + "Info" => Logging.Info, + "Warn" => Logging.Warn, + "Error" => Logging.Error, +) + +include(joinpath(BASE_DIR, "test", "test_data", "results_data.jl")) diff --git a/test/test_builtin_component_selectors.jl b/test/test_builtin_component_selectors.jl new file mode 100644 index 0000000..5a9e0f0 --- /dev/null +++ b/test/test_builtin_component_selectors.jl @@ -0,0 +1,27 @@ +test_sys = PSB.build_system(PSB.PSITestSystems, "c_sys5_all_components") +test_sys2 = PSB.build_system(PSB.PSITestSystems, "c_sys5_bat") +name_and_type = component -> (typeof(component), get_name(component)) + +@testset "Test load_component_selector and storage_component_selector" begin + @test Set(name_and_type.(get_components(load_component_selector, test_sys))) == + Set([(PowerLoad, "Bus2"), (PowerLoad, "Bus4"), (StandardLoad, "Bus3")]) + @test Set(name_and_type.(get_components(storage_component_selector, test_sys2))) == + Set([(EnergyReservoirStorage, "Bat")]) +end + +@testset "Test generator_selectors_by_fuel" begin + @test isfile(PA.FUEL_TYPES_DATA_FILE) + @test Set(keys(generator_selectors_by_fuel)) == + Set(["Biopower", "CSP", "Coal", "Geothermal", "Hydropower", "NG-CC", "NG-CT", + "NG-Steam", "Natural gas", "Nuclear", "Other", "PV", "Petroleum", "Storage", + "Wind"]) + @test Set( + name_and_type.(get_components(generator_selectors_by_fuel["Wind"], test_sys)), + ) == Set([(RenewableDispatch, "WindBusB"), (RenewableDispatch, "WindBusC"), + (RenewableDispatch, "WindBusA")]) + @test Set( + name_and_type.(get_components(generator_selectors_by_fuel["Coal"], test_sys)), + ) == Set([(ThermalStandard, "Park City"), (ThermalStandard, "Sundance"), + (ThermalStandard, "Alta"), (ThermalStandard, "Solitude"), + (ThermalStandard, "Brighton")]) +end diff --git a/test/test_data/results_data.jl b/test/test_data/results_data.jl index d4c0245..acb2a8a 100644 --- a/test/test_data/results_data.jl +++ b/test/test_data/results_data.jl @@ -1,5 +1,5 @@ function add_re!(sys) - re = RenewableDispatch( + re1 = RenewableDispatch( "WindBusA", true, get_component(ACBus, sys, "bus5"), @@ -9,11 +9,27 @@ function add_re!(sys) PrimeMovers.WT, (min = 0.0, max = 0.0), 1.0, - TwoPartCost(0.220, 0.0), - 100.0, + TwoPartCost(LinearFunctionData(0.220), 0.0), + 10.0, + ) + add_component!(sys, re1) + copy_time_series!(re1, get_component(PowerLoad, sys, "bus2")) + + re2 = RenewableDispatch( + "SolarBusC", + true, + get_component(ACBus, sys, "bus1"), + 0.0, + 0.0, + 1.200, + PrimeMovers.PVe, + (min = 0.0, max = 0.0), + 1.0, + TwoPartCost(LinearFunctionData(0.220), 0.0), + 2.0, ) - add_component!(sys, re) - copy_time_series!(re, get_component(PowerLoad, sys, "bus2")) + add_component!(sys, re2) + copy_time_series!(re2, get_component(PowerLoad, sys, "bus3")) fx = RenewableFix( "RoofTopSolar", @@ -61,9 +77,8 @@ function add_re!(sys) add_component!(sys, batt) end -function run_test_sim(result_dir::String) +function run_test_sim(result_dir::String, sim_name::String) mkpath(result_dir) - sim_name = "results_sim" sim_path = joinpath(result_dir, sim_name) results = _try_load_simulation_results(sim_path) diff --git a/test/test_input.jl b/test/test_input.jl new file mode 100644 index 0000000..bf9ceaa --- /dev/null +++ b/test/test_input.jl @@ -0,0 +1,66 @@ +stock_decision_results_sets = run_test_sim(TEST_RESULT_DIR, TEST_SIM_NAME) +stock_results_prob = run_test_prob() + +sim_results = SimulationResults(TEST_RESULT_DIR, TEST_SIM_NAME) +decision_problem_names = ("UC", "ED") +my_results_sets = get_decision_problem_results.(Ref(sim_results), decision_problem_names) + +# TODO is there a better way? +function test_system_equivalence(sys1::System, sys2::System) + @test ==(((sys1, sys2) .|> IS.get_internal .|> IS.get_uuid)...) + @test sort(get_name.(get_components(Component, sys1))) == + sort(get_name.(get_components(Component, sys2))) +end + +@testset "Test results function" begin + for (my_results, stock_results) in + zip(my_results_sets, stock_decision_results_sets) + test_system_equivalence( + get_system!(my_results), + get_system(stock_results), + ) + end +end + +# Reimplements Base.Filesystem.cptree since that isn't exported +function cptree(src::String, dst::String) + mkdir(dst) + for name in readdir(src) + srcname = joinpath(src, name) + if isdir(srcname) + cptree(srcname, joinpath(dst, name)) + else + cp(srcname, joinpath(dst, name)) + end + end +end + +# Create another results directory +function setup_duplicate_results() + teardown_duplicate_results() + cptree( + joinpath(TEST_RESULT_DIR, TEST_SIM_NAME), + joinpath(TEST_RESULT_DIR, TEST_DUPLICATE_RESULTS_NAME), + ) +end + +function teardown_duplicate_results() + rm(joinpath(TEST_RESULT_DIR, TEST_DUPLICATE_RESULTS_NAME); + force = true, recursive = true) +end + +@testset "Test scenario-level functions" begin + setup_duplicate_results() + for (problem, stock_results) in zip(decision_problem_names, stock_decision_results_sets) + scenario_names = [TEST_SIM_NAME, TEST_DUPLICATE_RESULTS_NAME] + scenarios = create_problem_results_dict(TEST_RESULT_DIR, problem) + @test Set(keys(scenarios)) == Set(scenario_names) + scenarios = create_problem_results_dict(TEST_RESULT_DIR, problem, scenario_names) + @test Set(keys(scenarios)) == Set(scenario_names) + test_system_equivalence( + get_system!(scenarios[TEST_SIM_NAME]), + get_system(stock_results), + ) + end + teardown_duplicate_results() +end diff --git a/test/test_metrics.jl b/test/test_metrics.jl new file mode 100644 index 0000000..9c0030e --- /dev/null +++ b/test/test_metrics.jl @@ -0,0 +1,670 @@ +# LOAD RESULTS +(results_uc, results_ed) = run_test_sim(TEST_RESULT_DIR, TEST_SIM_NAME) +results_prob = run_test_prob() +resultses = Dict("UC" => results_uc, "ED" => results_ed, "prob" => results_prob) +@assert all( + in.("ActivePowerVariable__ThermalStandard", list_variable_names.(values(resultses))), +) "Expected all results to contain ActivePowerVariable__ThermalStandard" +comp_results = Dict() # Will be populated later + +# CONSTRUCT COMMON TEST RESOURCES +test_calc_active_power = ComponentTimedMetric( + "ActivePower", + "Calculate the active power output of the specified `ComponentSelector`", + (res::IS.Results, comp::Component, + start_time::Union{Nothing, Dates.DateTime}, + len::Union{Int, Nothing}) -> let + key = PSI.VariableKey(ActivePowerVariable, typeof(comp)) + res = PSI.read_results_with_keys(res, [key]; start_time = start_time, len = len) + first(values(res))[!, [DATETIME_COL, get_name(comp)]] + end, +) + +test_calc_production_cost = ComponentTimedMetric( + "ProductionCost", + "Calculate the production cost of the specified `ComponentSelector`", + (res::IS.Results, comp::Component, + start_time::Union{Nothing, Dates.DateTime}, + len::Union{Int, Nothing}) -> let + key = PSI.ExpressionKey(ProductionCostExpression, typeof(comp)) + res = PSI.read_results_with_keys(res, [key]; start_time = start_time, len = len) + first(values(res))[!, [DATETIME_COL, get_name(comp)]] + end, +) + +test_calc_system_slack_up = SystemTimedMetric( + "SystemSlackUp", + "Calculate the system balance slack up", + (res::IS.Results, + start_time::Union{Nothing, Dates.DateTime}, + len::Union{Int, Nothing}) -> let + key = PSI.VariableKey(SystemBalanceSlackUp, System) + res = PSI.read_results_with_keys(res, [key]; start_time = start_time, len = len) + res = first(values(res)) + # If there's more than a datetime column and a data column, we are misunderstanding + @assert size(res, 2) == 2 + return DataFrames.rename( + res, + findfirst(!=(DATETIME_COL), names(res)) => SYSTEM_COL, + ) + end, +) + +test_calc_sum_objective_value = ResultsTimelessMetric( + "SumObjectiveValue", + "Sum the objective values achieved in the optimization problems", + (res::IS.Results) -> sum(PSI.read_optimizer_stats(res)[!, "objective_value"]), +) + +test_calc_sum_solve_time = ResultsTimelessMetric( + "SumSolveTime", + "Sum the solve times taken by the optimization problems", + (res::IS.Results) -> sum(PSI.read_optimizer_stats(res)[!, "solve_time"]), +) + +thermal_vals = [1, 2, 3] +thermal_weights = [1, 1, 3] +other_vals = [4, 5, 6] +other_weights = [2, 1, 1] +test_calc_dummy_meta = ComponentTimedMetric( + "DummyMeta", + "Return some simple numbers with some simple metadata", + (res::IS.Results, comp::Component, + start_time::Union{Nothing, Dates.DateTime}, + len::Union{Int, Nothing}) -> let + (start_time !== nothing && len !== nothing) && + error("Not implemented for non-nothing `start_time`, `len`") + dates = collect(DateTime(2023, 1, 1):Hour(8):DateTime(2023, 1, 1, 16)) + values = (typeof(comp) == ThermalStandard) ? thermal_vals : other_vals + agg_meta = (typeof(comp) == ThermalStandard) ? thermal_weights : other_weights + result = DataFrame(DATETIME_COL => dates, get_name(comp) => values) + set_agg_meta!(result, agg_meta) + end; + component_agg_fn = weighted_mean, + time_agg_fn = weighted_mean, +) + +my_dates = [DateTime(2023), DateTime(2024)] +my_data1 = [3.14, 2.71] +my_data2 = [1.61, 1.41] +my_meta = [1, 2] + +my_df1 = DataFrame(DATETIME_COL => my_dates, "MyComponent" => my_data1) +colmetadata!(my_df1, DATETIME_COL, META_COL_KEY, true; style = :note) + +my_df2 = DataFrame( + DATETIME_COL => my_dates, + "Component1" => my_data1, + "Component2" => my_data2, + "MyMeta" => my_meta, +) +colmetadata!(my_df2, DATETIME_COL, META_COL_KEY, true; style = :note) +colmetadata!(my_df2, "MyMeta", META_COL_KEY, true; style = :note) + +missing_df = DataFrame( + DATETIME_COL => Vector{Union{Missing, Dates.DateTime}}([missing]), + "MissingComponent" => 0.0) +colmetadata!(missing_df, DATETIME_COL, META_COL_KEY, true; style = :note) + +my_df_agg_meta = copy(my_df2) +my_agg_meta = [[1, 2], [3, 4]] +colmetadata!(my_df_agg_meta, "Component1", AGG_META_KEY, my_agg_meta[1]; style = :note) +colmetadata!(my_df_agg_meta, "Component2", AGG_META_KEY, my_agg_meta[2]; style = :note) + +my_dates_long = collect(DateTime(2023, 1, 1):Hour(8):DateTime(2023, 3, 31, 16)) +my_data_long_1 = collect(range(0, 100, length(my_dates_long))) +my_data_long_2 = collect(range(24, 0, length(my_dates_long))) .+ 0.5 / length(my_dates_long) +my_meta_long = (my_dates_long .|> day) .% 2 +my_df3 = DataFrame( + DATETIME_COL => my_dates_long, + "Component1" => my_data_long_1, + "Component2" => my_data_long_2, + "MyMeta" => my_meta_long, +) + +wind_ent = select_components(RenewableDispatch, "WindBusA") +solar_ent = select_components(RenewableDispatch, "SolarBusC") +thermal_ent = select_components(ThermalStandard, "Brighton") +test_selectors = [wind_ent, solar_ent, thermal_ent] + +# HELPER FUNCTIONS +function test_timed_metric_helper(computed_alltime, met, data_colname) + test_generic_metric_helper(computed_alltime, met, data_colname) + @test names(computed_alltime) == [DATETIME_COL, data_colname] + @test eltype(computed_alltime[!, DATETIME_COL]) <: Union{Missing, DateTime} + + # Row tests, all time + # TODO check that the number of rows is correct? +end + +function test_generic_metric_helper(computed, met, data_colname) + @test get(metadata(computed), "title", nothing) == met.name + @test get(metadata(computed), "metric", nothing) === met + @test get(colmetadata(computed, data_colname), "metric", nothing) == + get(metadata(computed), "metric", nothing) + @test eltype(computed[!, data_colname]) <: Number +end + +function test_component_timed_metric(met, res, ent) + computed_alltime = compute(met, res, ent) + test_timed_metric_helper(computed_alltime, met, get_name(ent)) + + the_components = + (ent isa Component) ? [ent] : + collect(get_components(ent, get_system(res))) + @test all( + get(colmetadata(computed_alltime, get_name(ent)), "components", nothing) .== + the_components, + ) + (ent isa ComponentSelector) && + @test get( + colmetadata(computed_alltime, get_name(ent)), + "ComponentSelector", + nothing, + ) == ent + + # Row tests, specified time. Skip for test on empty selector, there is no time axis to base computed_sometime off in this case + if length(the_components) > 0 + test_start_time = computed_alltime[2, DATETIME_COL] + test_len = 3 + computed_sometime = compute(met, res, ent; + start_time = test_start_time, len = test_len) + @test computed_sometime[1, DATETIME_COL] == test_start_time + @test size(computed_sometime, 1) == test_len + else + computed_sometime = nothing + end + + return computed_alltime, computed_sometime +end + +function test_system_timed_metric(met, res) + computed_alltime = compute(met, res) + test_timed_metric_helper(computed_alltime, met, SYSTEM_COL) + @test compute(met, res, nothing) == computed_alltime + + # Row tests, specified time + test_start_time = computed_alltime[2, DATETIME_COL] + test_len = 3 + computed_sometime = compute(met, res; start_time = test_start_time, len = test_len) + @test computed_sometime[1, DATETIME_COL] == test_start_time + @test size(computed_sometime, 1) == test_len + + return computed_alltime, computed_sometime +end + +function test_results_timeless_metric(met, res) + computed = compute(met, res) + test_generic_metric_helper(computed, met, RESULTS_COL) + @test compute(met, res, nothing) == computed + return computed +end + +function test_df_approx_equal(lhs, rhs) + @test all(names(lhs) .== names(rhs)) + for (lhs_col, rhs_col) in zip(eachcol(lhs), eachcol(rhs)) + if eltype(lhs_col) <: AbstractFloat || eltype(lhs_col) <: AbstractFloat + @test all(isapprox.(lhs_col, rhs_col)) + else + @test all(lhs_col .== rhs_col) + end + end +end + +# BEGIN TEST SETS +@testset "Test metrics helper functions" begin + @test metric_selector_to_string( + test_calc_active_power, + select_components(ThermalStandard), + ) == + "ActivePower__ThermalStandard" + + @test is_col_meta(my_df2, DATETIME_COL) + @test !is_col_meta(my_df2, "Component1") + @test is_col_meta(my_df2, "MyMeta") + + my_df1_copy = copy(my_df1) + @test !is_col_meta(my_df1_copy, "MyComponent") + set_col_meta!(my_df1_copy, "MyComponent") + @test is_col_meta(my_df1_copy, "MyComponent") + set_col_meta!(my_df1_copy, "MyComponent", false) + @test !is_col_meta(my_df1_copy, "MyComponent") + + @test time_df(my_df1) == DataFrame(DATETIME_COL => copy(my_dates)) + @test time_vec(my_df1) == copy(my_dates) + @test data_df(my_df1) == DataFrame(; MyComponent = copy(my_data1)) + @test data_vec(my_df1) == copy(my_data1) + @test data_mat(my_df1) == copy(my_data1)[:, :] + + @test data_cols(my_df2) == ["Component1", "Component2"] + @test time_df(my_df2) == DataFrame(DATETIME_COL => copy(my_dates)) + @test time_vec(my_df2) == copy(my_dates) + @test data_df(my_df2) == + DataFrame("Component1" => copy(my_data1), "Component2" => copy(my_data2)) + @test_throws ArgumentError data_vec(my_df2) + @test data_mat(my_df2) == hcat(copy(my_data1), copy(my_data2)) + + @test hcat_timed(my_df1, DataFrames.rename(my_df1, "MyComponent" => "YourComponent")) == + DataFrame( + DATETIME_COL => my_dates, + "MyComponent" => my_data1, + "YourComponent" => my_data1, + ) +end + +@testset "Test aggregate_time" begin + test_df_approx_equal( + aggregate_time(my_df3; agg_fn = sum), + DataFrame( + DATETIME_COL => first(my_dates_long), + "Component1" => sum(my_data_long_1), + "Component2" => sum(my_data_long_2), + "MyMeta" => sum(my_meta_long), + ), + ) + @test is_col_meta(aggregate_time(my_df3; agg_fn = sum), DATETIME_COL) + + month_agg = + aggregate_time(my_df3; groupby_fn = dt -> (year(dt), month(dt)), agg_fn = sum) + @test size(month_agg, 1) == 3 + test_df_approx_equal( + month_agg[1:1, :], + DataFrame( + DATETIME_COL => first(my_dates_long), + "Component1" => sum(my_data_long_1[1:(31 * 3)]), + "Component2" => sum(my_data_long_2[1:(31 * 3)]), + "MyMeta" => sum(my_meta_long[1:(31 * 3)]), + ), + ) + + day_agg = aggregate_time(my_df3; groupby_fn = Date, agg_fn = sum) + @test size(day_agg, 1) == 31 + 28 + 31 + test_df_approx_equal( + day_agg[1:1, :], + DataFrame( + DATETIME_COL => first(my_dates_long), + "Component1" => sum(my_data_long_1[1:3]), + "Component2" => sum(my_data_long_2[1:3]), + "MyMeta" => sum(my_meta_long[1:3]), + ), + ) + + hour_agg = aggregate_time(my_df3; groupby_fn = hour, agg_fn = sum) + @test size(hour_agg, 1) == 3 + test_df_approx_equal( + hour_agg[1:1, :], + DataFrame( + DATETIME_COL => first(my_dates_long), + "Component1" => sum(my_data_long_1[1:3:end]), + "Component2" => sum(my_data_long_2[1:3:end]), + "MyMeta" => sum(my_meta_long[1:3:end]), + ), + ) + + day_agg_2 = aggregate_time(my_df3; groupby_fn = Date, groupby_col = "day", agg_fn = sum) + @test "day" in names(day_agg_2) + @test is_col_meta(day_agg_2, "day") + @test day_agg_2[!, "day"] == Date.(time_vec(day_agg_2)) +end + +@testset "Test ComponentTimedMetric on Components" begin + for (label, res) in pairs(resultses) + comps1 = collect(get_components(RenewableDispatch, get_system(res))) + comps2 = collect(get_components(ThermalStandard, get_system(res))) + for comp in vcat(comps1, comps2) + # This is a lot of testing, but we need all these in the dictionary anyway for + # later, so we might as well test with all of them + comp_results[(label, get_name(comp))] = + test_component_timed_metric(test_calc_active_power, res, comp) + end + end +end + +@testset "Test ComponentTimedMetric on ComponentSelectorElements" begin + for (label, res) in pairs(resultses) + for ent in test_selectors + computed_alltime, computed_sometime = + test_component_timed_metric(test_calc_active_power, res, ent) + + # ComponentSelectorElement results should be the same as Component results + component_name = get_name(first(get_components(ent, get_system(res)))) + base_computed_alltime, base_computed_sometime = + comp_results[(label, component_name)] + @test time_df(computed_alltime) == time_df(base_computed_alltime) + # Using data_vec because the column names are allowed to differ + @test data_vec(computed_alltime) == data_vec(base_computed_alltime) + @test time_df(computed_sometime) == time_df(base_computed_sometime) + @test data_vec(computed_sometime) == data_vec(base_computed_sometime) + end + end +end + +@testset "Test ComponentTimedMetric on ComponentSelectorSets" begin + test_selector_sets = [ + select_components(wind_ent, solar_ent), + select_components(test_selectors...), + select_components(ThermalStandard), + select_components(), + ] + + for (label, res) in pairs(resultses) + for ent in test_selector_sets + my_test_metric = test_calc_active_power + computed_alltime, computed_sometime = + test_component_timed_metric(my_test_metric, res, ent) + + component_names = get_name.(get_components(ent, get_system(res))) + if length(component_names) == 0 + @test isequal(time_vec(computed_alltime), + Vector{Union{Missing, Dates.DateTime}}([missing])) + @test data_vec(computed_alltime) == + [get_component_agg_fn(my_test_metric)(Vector{Float64}())] + else + (base_computed_alltimes, base_computed_sometimes) = + zip([comp_results[(label, cn)] for cn in component_names]...) + @test time_df(computed_alltime) == time_df(first(base_computed_alltimes)) + @test data_vec(computed_alltime) == + sum([data_vec(sub) for sub in base_computed_alltimes]) + @test time_df(computed_sometime) == + time_df(first(base_computed_sometimes)) + @test data_vec(computed_sometime) == + sum([data_vec(sub) for sub in base_computed_sometimes]) + end + end + end +end + +@testset "Test SystemTimedMetric" begin + # The relevant data only exists in the ED results + test_system_timed_metric(test_calc_system_slack_up, results_ed) +end + +@testset "Test ResultsTimelessMetric" begin + for (label, res) in pairs(resultses) + test_results_timeless_metric(test_calc_sum_objective_value, res) + end +end + +@testset "Test compute_set" begin + combo_selector = select_components(test_selectors...) + mymet = test_calc_active_power + for (label, res) in pairs(resultses) + computed_alltime = compute_set(mymet, res, combo_selector) + cols = data_cols(computed_alltime) + ents = colmetadata.(Ref(computed_alltime), cols, "ComponentSelector") + @test all(ents .== test_selectors) # One column for each subselector in the input + @test all(cols .== get_name.(ents)) # Named properly + + # TODO bit of code duplication between here and test_component_timed_metric + test_start_time = computed_alltime[2, DATETIME_COL] + test_len = 3 + computed_sometime = compute_set(mymet, res, combo_selector; + start_time = test_start_time, len = test_len) + @test computed_sometime[1, DATETIME_COL] == test_start_time + @test size(computed_sometime, 1) == test_len + + # DateTime plus data column slices of compute_set() should be identical to results of compute() + for (col_name, this_selector) in zip(cols, ents) + test_timed_metric_helper( + computed_alltime[!, [DATETIME_COL, col_name]], + mymet, + col_name, + ) + this_components = collect(get_components(this_selector, get_system(res))) + @test all( + get(colmetadata(computed_alltime, col_name), "components", nothing) .== + this_components, + ) + + base_computed_alltime, base_computed_sometime = + comp_results[(label, get_name(first(this_components)))] + @test time_df(computed_alltime) == time_df(base_computed_alltime) + @test computed_alltime[!, col_name] == data_vec(base_computed_alltime) + @test time_df(computed_sometime) == time_df(base_computed_sometime) + @test computed_sometime[!, col_name] == data_vec(base_computed_sometime) + end + end +end + +@testset "Test compute_all" begin + my_metrics = [test_calc_active_power, test_calc_active_power, + test_calc_production_cost, test_calc_production_cost] + my_component = first(get_components(RenewableDispatch, get_system(results_uc))) + my_selectors = + [select_components(ThermalStandard), select_components(RenewableDispatch), + select_components(ThermalStandard), my_component] + all_result = compute_all(results_uc, my_metrics, my_selectors) + + for (metric, selector) in zip(my_metrics, my_selectors) + one_result = compute(metric, results_uc, selector) + @test time_df(all_result) == time_df(one_result) + @test all_result[!, metric_selector_to_string(metric, selector)] == + data_vec(one_result) + @test get(metadata(all_result), "results", nothing) == + get(metadata(one_result), "results", nothing) + # Comparing the components iterators with == gives false failures + # TODO why do we need collect here but not in test_component_timed_metric? + @test all( + collect( + colmetadata( + all_result, + metric_selector_to_string(metric, selector), + "components", + ), + ) .== collect(colmetadata(one_result, 2, "components")), + ) + @test colmetadata( + all_result, + metric_selector_to_string(metric, selector), + "metric", + ) == + colmetadata(one_result, 2, "metric") + (selector isa Component) || @test colmetadata( + all_result, + metric_selector_to_string(metric, selector), + "ComponentSelector", + ) == colmetadata(one_result, 2, "ComponentSelector") + end + + my_names = ["Thermal Power", "Renewable Power", "Thermal Cost", "Renewable Cost"] + all_result_named = compute_all(results_uc, my_metrics, my_selectors, my_names) + @test names(all_result_named) == vcat(DATETIME_COL, my_names...) + @test time_df(all_result_named) == time_df(all_result) + @test data_mat(all_result_named) == data_mat(all_result) + + @test_throws ArgumentError compute_all(results_uc, my_metrics, my_selectors[2:end]) + @test_throws ArgumentError compute_all(results_uc, my_metrics, my_selectors, + my_names[2:end]) + + for (label, res) in pairs(resultses) + @test compute_all(res, [test_calc_sum_objective_value, test_calc_sum_solve_time], + nothing, ["Met1", "Met2"]) == DataFrame( + "Met1" => first(data_mat(compute(test_calc_sum_objective_value, res))), + "Met2" => first(data_mat(compute(test_calc_sum_solve_time, res)))) + end + + broadcasted_compute_all = compute_all( + results_uc, + [test_calc_active_power, test_calc_active_power], + select_components(ThermalStandard), + ["discard", "ThermalStandard"], + ) + @test broadcasted_compute_all[!, [DATETIME_COL, "ThermalStandard"]] == + compute(test_calc_active_power, results_uc, select_components(ThermalStandard)) + + @test compute_all(results_uc, my_metrics, my_selectors, my_names) == + compute_all(results_uc, collect(zip(my_metrics, my_selectors, my_names))...) + @test compute_all( + results_uc, + [test_calc_sum_objective_value, test_calc_sum_solve_time], + nothing, + ["obje", "solv"], + ) == compute_all( + results_uc, + (test_calc_sum_objective_value, nothing, "obje"), + (test_calc_sum_solve_time, nothing, "solv"), + ) + + @test_throws MethodError compute_all( # Can't mix TimedMetrics and TimelessMetrics + results_uc, + [(test_calc_active_power, select_components(ThermalStandard), "therm"), + (test_calc_sum_objective_value, nothing, "obje")], + ) +end + +@testset "Test compose_metrics" begin + myent = select_components(ThermalStandard) + mymet1 = compose_metrics( + "ThriceActivePower", + "Computes ActivePower*3", + (+), + test_calc_active_power, + test_calc_active_power, + test_calc_active_power, + ) + results1 = compute_all( + results_uc, + [test_calc_active_power, mymet1], + [myent, myent], + ["once", "thrice"], + ) + @test all(results1[!, "once"] * 3 .== results1[!, "thrice"]) + + mymet2 = compose_metrics( + "ThriceSystemSlackUp", + "Computes SystemSlackUp*3", + (+), + test_calc_system_slack_up, + test_calc_system_slack_up, + test_calc_system_slack_up, + ) + results2 = compute_all( + results_ed, + [test_calc_system_slack_up, mymet2], + nothing, + ["once", "thrice"], + ) + @test all(results2[!, "once"] * 3 .== results2[!, "thrice"]) + + mymet3 = compose_metrics( + "ThriceSumObjectiveValue", + "Computes SumObjectiveValue*3", + (+), + test_calc_sum_objective_value, + test_calc_sum_objective_value, + test_calc_sum_objective_value, + ) + results3 = compute_all( + results_uc, + [test_calc_sum_objective_value, mymet3], + nothing, + ["once", "thrice"], + ) + @test all(results3[!, "once"] * 3 .== results3[!, "thrice"]) + + mymet4 = compose_metrics( + "SlackSlackPower", + "Computes SystemSlackUp^2*ActivePower (element-wise)", + (.*), + test_calc_system_slack_up, + test_calc_active_power, + test_calc_system_slack_up, + ) + results4 = compute_all( + results_ed, + [test_calc_system_slack_up, test_calc_active_power, mymet4], + [nothing, myent, myent], + ["slack", "power", "final"], + ) + @test all(results4[!, "slack"] .^ 2 .* results4[!, "power"] .== results4[!, "final"]) +end + +@testset "Test agg_meta basics" begin + @test get_agg_meta(my_df_agg_meta, "Component1") == my_agg_meta[1] + @test get_agg_meta(my_df_agg_meta, "Component2") == my_agg_meta[2] + new_df = copy(my_df2) + set_agg_meta!(new_df, "Component1", my_agg_meta[1]) + set_agg_meta!(new_df, "Component2", my_agg_meta[2]) + @test get_agg_meta(new_df, "Component1") == my_agg_meta[1] + @test get_agg_meta(new_df, "Component2") == my_agg_meta[2] + + newer_df = copy(my_df1) + @test get_agg_meta(newer_df) === nothing + set_agg_meta!(newer_df, my_meta) + @test get_agg_meta(newer_df) == my_meta +end + +@testset "Test component_agg_fn and corresponding `compute` aggregation behavior" begin + my_selector = select_components(ThermalStandard) + my_results = results_uc + sum_metric = test_calc_active_power + @test get_component_agg_fn(sum_metric) == sum # Should be the default + my_mean(x) = sum(x) / length(x) + mean_metric = with_component_agg_fn(sum_metric, my_mean) + @test get_component_agg_fn(mean_metric) == my_mean + # NOTE a more thorough approach would test the getter and "with-er" on all subtypes + + results = compute_all( + my_results, + [sum_metric, mean_metric], + [my_selector, my_selector], + ["sum_col", "mean_col"], + ) + @assert !all(results[!, "sum_col"] .== 0) "Cannot test with all-zero data" + n_components = get_components(my_selector, get_system(my_results)) |> collect |> length + @assert n_components > 1 "Cannot test without multiple components" + + @test all(isapprox.(results[!, "mean_col"] .* n_components, results[!, "sum_col"])) + + results2 = compute_all( + my_results, + repeat([test_calc_dummy_meta], 3), + [thermal_ent, wind_ent, select_components(thermal_ent, wind_ent)], + ["thermal", "wind", "combo"]) + @test isapprox( + data_mat(results2), + [thermal_vals other_vals weighted_mean( + [thermal_vals, other_vals], + [thermal_weights, other_weights], + )], + ) + @test get_agg_meta.(Ref(results2), data_cols(results2)) == + [thermal_weights, other_weights, sum([thermal_weights, other_weights])] +end + +@testset "Test time_agg_fn and corresponding `aggregate_time` aggregation behavior" begin + my_selector = select_components(ThermalStandard) + my_results = results_uc + sum_metric = test_calc_active_power + @test get_time_agg_fn(sum_metric) == sum # Should be the default + mean_metric = with_time_agg_fn(sum_metric, mean) + @test get_time_agg_fn(mean_metric) == mean + # NOTE a more thorough approach would test the getter and "with-er" on all subtypes + + results = compute_all( + my_results, + [sum_metric, mean_metric], + [my_selector, my_selector], + ["sum_col", "mean_col"], + ) + results_agg = aggregate_time(results) + @assert !all(results[!, "sum_col"] .== 0) "Cannot test with all-zero data" + n_times = size(results, 1) + @assert n_times > 1 "Cannot test without multiple time periods" + + @test isapprox(first(results_agg[!, "sum_col"]), sum(results[!, "sum_col"])) + @test isapprox(first(results_agg[!, "mean_col"]), mean(results[!, "sum_col"])) + + results2 = compute_all( + my_results, + repeat([test_calc_dummy_meta], 3), + [thermal_ent, wind_ent, select_components(thermal_ent, wind_ent)], + ["thermal", "wind", "combo"]) + results2_agg = aggregate_time(results2) + answer1 = weighted_mean(thermal_vals, thermal_weights) + answer2 = weighted_mean(other_vals, other_weights) + @test results2_agg[!, "thermal"] == [answer1] + @test results2_agg[!, "wind"] == [answer2] + @test results2_agg[!, "combo"] == + [weighted_mean([answer1, answer2], [sum(thermal_weights), sum(other_weights)])] +end diff --git a/test/test_result_sorting.jl b/test/test_result_sorting.jl index e63207d..dbe3080 100644 --- a/test/test_result_sorting.jl +++ b/test/test_result_sorting.jl @@ -1,4 +1,4 @@ -(results_uc, results_ed) = run_test_sim(TEST_RESULT_DIR) +(results_uc, results_ed) = run_test_sim(TEST_RESULT_DIR, TEST_SIM_NAME) problem_results = run_test_prob() @testset "test filter results" begin