diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ac1e885e..31959ec6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,7 +4,7 @@ on: branches: - master - dev - - for-a-0-point-21-release + - for-a-0-point-22-release - next-breaking-release push: branches: diff --git a/Project.toml b/Project.toml index cfb0d85f..dc7cd027 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJBase" uuid = "a7f614a8-145f-11e9-1d2a-a57a1082229d" authors = ["Anthony D. Blaom "] -version = "0.21.14" +version = "1.0.0" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" @@ -13,8 +13,8 @@ Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" +LearnAPI = "92ad9a40-7767-427a-9ee6-6e577f1266cb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7" MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" @@ -22,13 +22,22 @@ Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Reexport = "189a3867-3050-52da-a836-e630ba90ab69" ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +StatisticalMeasures = "a19d573c-0a75-4610-95b3-7071388c7541" +StatisticalMeasuresBase = "c062fc1d-0d66-479b-b6ac-8b44719de4cc" StatisticalTraits = "64bff920-2084-43da-a3e6-9bb72801c0c9" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +[weakdeps] +StatisticalMeasures = "a19d573c-0a75-4610-95b3-7071388c7541" + +[extensions] +DefaultMeasuresExt = "StatisticalMeasures" + [compat] CategoricalArrays = "0.9, 0.10" CategoricalDistributions = "0.1" @@ -36,14 +45,17 @@ ComputationalResources = "0.3" DelimitedFiles = "1" Distributions = "0.25.3" InvertedIndices = "1" -LossFunctions = "0.11" +LearnAPI = "0.1" MLJModelInterface = "1.7" Missings = "0.4, 1" OrderedCollections = "1.1" Parameters = "0.12" PrettyTables = "1, 2" ProgressMeter = "1.7.1" +Reexport = "1.2" ScientificTypes = "3" +StatisticalMeasures = "0.1.1" +StatisticalMeasuresBase = "0.1.1" StatisticalTraits = "3.2" StatsBase = "0.32, 0.33, 0.34" Tables = "0.2, 1.0" @@ -57,8 +69,9 @@ Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" +StatisticalMeasures = "a19d573c-0a75-4610-95b3-7071388c7541" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" [targets] -test = ["DataFrames", "DecisionTree", "Distances", "Logging", "MultivariateStats", "NearestNeighbors", "StableRNGs", "Test", "TypedTables"] +test = ["DataFrames", "DecisionTree", "Distances", "Logging", "MultivariateStats", "NearestNeighbors", "StableRNGs", "StatisticalMeasures", "Test", "TypedTables"] diff --git a/README.md b/README.md index 9323a9c2..8e9fc1e5 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,11 @@ repository provides core functionality for MLJ, including: - basic utilities for **manipulating datasets** and for **synthesizing datasets** (src/data) -- a [small interface](https://alan-turing-institute.github.io/MLJ.jl/dev/evaluating_model_performance/#Custom-resampling-strategies-1) for **resampling strategies** and implementations, including `CV()`, `StratifiedCV` and `Holdout` (src/resampling.jl) +- a [small + interface](https://alan-turing-institute.github.io/MLJ.jl/dev/evaluating_model_performance/#Custom-resampling-strategies-1) + for **resampling strategies** and implementations, including `CV()`, `StratifiedCV` and + `Holdout` (src/resampling.jl). Actual performance evaluation measures (aka metrics), which previously + were provided by MLJBase.jl, now live in [StatisticalMeasures.jl](https://juliaai.github.io/StatisticalMeasures.jl/dev/). - methods for **performance evaluation**, based on those resampling strategies (src/resampling.jl) @@ -44,9 +48,4 @@ repository provides core functionality for MLJ, including: associated methods, for use with [MLJTuning](https://github.com/JuliaAI/MLJTuning.jl) (src/hyperparam) -- a [small - interface](https://alan-turing-institute.github.io/MLJ.jl/dev/performance_measures/#Traits-and-custom-measures-1) - for **performance measures** (losses and scores), implementation of about 60 such measures, including integration of the - [LossFunctions.jl](https://github.com/JuliaML/LossFunctions.jl) - library (src/measures). To be migrated into separate package in the near future. diff --git a/ext/DefaultMeasuresExt.jl b/ext/DefaultMeasuresExt.jl new file mode 100644 index 00000000..a06cd00f --- /dev/null +++ b/ext/DefaultMeasuresExt.jl @@ -0,0 +1,15 @@ +module DefaultMeasuresExt + +using MLJBase +import MLJBase:default_measure, ProbabilisticDetector, DeterministicDetector +using StatisticalMeasures +using StatisticalMeasures.ScientificTypesBase + +default_measure(::Deterministic, ::Type{<:Union{Continuous,Count}}) = l2 +default_measure(::Deterministic, ::Type{<:Finite}) = misclassification_rate +default_measure(::Probabilistic, ::Type{<:Union{Finite,Count}}) = log_loss +default_measure(::Probabilistic, ::Type{<:Continuous}) = log_loss +default_measure(::ProbabilisticDetector, ::Type{<:OrderedFactor{2}}) = area_under_curve +default_measure(::DeterministicDetector, ::Type{<:OrderedFactor{2}}) = balanced_accuracy + +end # module diff --git a/src/MLJBase.jl b/src/MLJBase.jl index 63fe4fd7..f0a19e93 100644 --- a/src/MLJBase.jl +++ b/src/MLJBase.jl @@ -3,6 +3,7 @@ module MLJBase # =================================================================== # IMPORTS +using Reexport import Base: ==, precision, getindex, setindex! import Base.+, Base.*, Base./ @@ -16,7 +17,7 @@ for trait in StatisticalTraits.TRAITS eval(:(import StatisticalTraits.$trait)) end -import Base.instances # considered a trait for measures +import LearnAPI import StatisticalTraits.snakecase import StatisticalTraits.info @@ -47,7 +48,7 @@ end ################### # Hack Block ends # ################### - +import MLJModelInterface: ProbabilisticDetector, DeterministicDetector import MLJModelInterface: fit, update, update_data, transform, inverse_transform, fitted_params, predict, predict_mode, predict_mean, predict_median, predict_joint, @@ -78,8 +79,6 @@ using ProgressMeter import .Threads # Operations & extensions -import LossFunctions -import LossFunctions.Traits import StatsBase import StatsBase: fit!, mode, countmap import Missings: levels @@ -89,6 +88,9 @@ using CategoricalDistributions import Distributions: pdf, logpdf, sampler const Dist = Distributions +# Measures +import StatisticalMeasuresBase + # from Standard Library: using Statistics, LinearAlgebra, Random, InteractiveUtils @@ -128,57 +130,6 @@ const CatArrMissing{T,N} = ArrMissing{CategoricalValue{T},N} const MMI = MLJModelInterface const FI = MLJModelInterface.FullInterface -const MARGIN_LOSSES = [ - :DWDMarginLoss, - :ExpLoss, - :L1HingeLoss, - :L2HingeLoss, - :L2MarginLoss, - :LogitMarginLoss, - :ModifiedHuberLoss, - :PerceptronLoss, - :SigmoidLoss, - :SmoothedL1HingeLoss, - :ZeroOneLoss -] - -const DISTANCE_LOSSES = [ - :HuberLoss, - :L1EpsilonInsLoss, - :L2EpsilonInsLoss, - :LPDistLoss, - :LogitDistLoss, - :PeriodicLoss, - :QuantileLoss -] - -const WITH_PARAMETERS = [ - :DWDMarginLoss, - :SmoothedL1HingeLoss, - :HuberLoss, - :L1EpsilonInsLoss, - :L2EpsilonInsLoss, - :LPDistLoss, - :QuantileLoss, -] - -const MEASURE_TYPE_ALIASES = [ - :FPR, :FNR, :TPR, :TNR, - :FDR, :PPV, :NPV, :Recall, :Specificity, - :MFPR, :MFNR, :MTPR, :MTNR, - :MFDR, :MPPV, :MNPV, :MulticlassRecall, :MulticlassSpecificity, - :MCR, - :MCC, - :BAC, :BACC, - :RMS, :RMSPV, :RMSL, :RMSLP, :RMSP, - :MAV, :MAE, :MAPE, - :RSQ, :LogCosh, - :CrossEntropy, - :AUC -] - -const LOSS_FUNCTIONS = vcat(MARGIN_LOSSES, DISTANCE_LOSSES) - # =================================================================== # Computational Resource # default_resource allows to switch the mode of parallelization @@ -199,19 +150,13 @@ include("models.jl") include("sources.jl") include("machines.jl") -include("composition/deprecated_abstract_types.jl") include("composition/learning_networks/nodes.jl") include("composition/learning_networks/inspection.jl") include("composition/learning_networks/signatures.jl") -include("composition/learning_networks/deprecated_machines.jl") include("composition/learning_networks/replace.jl") -include("composition/models/deprecated_pipelines.jl") -include("composition/models/deprecated_methods.jl") include("composition/models/network_composite_types.jl") include("composition/models/network_composite.jl") -include("composition/models/deprecated_from_network.jl") -include("composition/models/inspection.jl") include("composition/models/pipelines.jl") include("composition/models/transformed_target_model.jl") @@ -225,21 +170,14 @@ include("data/data.jl") include("data/datasets.jl") include("data/datasets_synthetic.jl") -include("measures/measures.jl") -include("measures/measure_search.jl") -include("measures/doc_strings.jl") +include("default_measures.jl") include("composition/models/stacking.jl") -# function on the right-hand side is defined in src/measures/meta_utilities.jl: -const MEASURE_TYPES_ALIASES_AND_INSTANCES = measures_for_export() - const EXTENDED_ABSTRACT_MODEL_TYPES = vcat( MLJBase.MLJModelInterface.ABSTRACT_MODEL_SUBTYPES, MLJBase.NETWORK_COMPOSITE_TYPES, # src/composition/models/network_composite_types.jl - MLJBase.COMPOSITE_TYPES, # src/composition/abstract_types.jl - MLJBase.SURROGATE_TYPES, # src/composition/abstract_types.jl - [:MLJType, :Model, :NetworkComposite, :Surrogate, :Composite], + [:MLJType, :Model, :NetworkComposite], ) # =================================================================== @@ -337,8 +275,8 @@ export machine, Machine, fit!, report, fit_only!, default_scitype_check_level, # datasets_synthetics.jl export make_blobs, make_moons, make_circles, make_regression -# composition (surrogates and composites are exported in composition): -export machines, sources, @from_network, @pipeline, Stack, +# composition +export machines, sources, Stack, glb, @tuple, node, @node, sources, origins, return!, nrows_at_source, machine, rebind!, nodes, freeze!, thaw!, Node, AbstractNode, Pipeline, @@ -357,23 +295,8 @@ export ResamplingStrategy, Holdout, CV, StratifiedCV, TimeSeriesCV, # ------------------------------------------------------------------- # exports from MLJBase specific to measures -# measure names: -for m in MEASURE_TYPES_ALIASES_AND_INSTANCES - :(export $m) |> eval -end - -# measures/registry.jl: -export measures, metadata_measure - # measure/measures.jl (excluding traits): -export aggregate, default_measure, value, skipinvalid - -# measures/probabilistic: -export roc_curve, roc - -# measures/finite.jl (averaging modes for multiclass scores) -export no_avg, macro_avg, micro_avg - +export default_measure # ------------------------------------------------------------------- # re-export from Random, StatsBase, Statistics, Distributions, @@ -381,4 +304,10 @@ export no_avg, macro_avg, micro_avg export pdf, sampler, mode, median, mean, shuffle!, categorical, shuffle, levels, levels!, std, Not, support, logpdf, LittleDict +# for julia < 1.9 +if !isdefined(Base, :get_extension) + include(joinpath("..","ext", "DefaultMeasuresExt.jl")) + @reexport using .DefaultMeasuresExt.StatisticalMeasures +end + end # module diff --git a/src/composition/deprecated_abstract_types.jl b/src/composition/deprecated_abstract_types.jl deleted file mode 100644 index e71ef88e..00000000 --- a/src/composition/deprecated_abstract_types.jl +++ /dev/null @@ -1,40 +0,0 @@ -## COMPOSITE AND SURRUGOTE MODEL TYPES - -# For example, we want to define - -# abstract type ProbabilisticComposite <: Probabilistic end -# struct ProbabilisticSurrogate <: Probabilistic end -# Probabilistic() = ProbablisiticSurrogate() - -# but also want this for all the abstract `Model` subtypes: - -const COMPOSITE_TYPES = Symbol[] -const SURROGATE_TYPES = Symbol[] -const composite_types = Any[] -const surrogate_types = Any[] - -for T in MLJModelInterface.ABSTRACT_MODEL_SUBTYPES - composite_type_name = string(T, "Composite") |> Symbol - surrogate_type_name = string(T, "Surrogate") |> Symbol - - @eval(abstract type $composite_type_name <: $T end) - @eval(struct $surrogate_type_name <: $T end) - - push!(COMPOSITE_TYPES, composite_type_name) - push!(SURROGATE_TYPES, surrogate_type_name) - push!(composite_types, @eval($composite_type_name)) - push!(surrogate_types, @eval($surrogate_type_name)) - - # shorthand surrogate constructor: - @eval($T() = $surrogate_type_name()) -end - - -const Surrogate = Union{surrogate_types...} -const Composite = Union{composite_types...} - -MLJModelInterface.is_wrapper(::Type{<:Union{Composite,Surrogate}}) = true -MLJModelInterface.package_name(::Type{<:Union{Composite,Surrogate}}) = "MLJBase" -for T in surrogate_types - MLJModelInterface.load_path(::Type{T}) = string("MLJBase.", T) -end diff --git a/src/composition/learning_networks/deprecated_machines.jl b/src/composition/learning_networks/deprecated_machines.jl deleted file mode 100644 index 9080d196..00000000 --- a/src/composition/learning_networks/deprecated_machines.jl +++ /dev/null @@ -1,440 +0,0 @@ -# # SIGNATURES - -function _operation_part(signature) - ops = filter(in(OPERATIONS), keys(signature)) - return NamedTuple{ops}(map(op->getproperty(signature, op), ops)) -end -function _report_part(signature) - :report in keys(signature) || return NamedTuple() - return signature.report -end - -_operations(signature) = keys(_operation_part(signature)) - -function _nodes(signature) - return (values(_operation_part(signature))..., - values(_report_part(signature))...) -end - -function _call(nt::NamedTuple) - _call(n) = deepcopy(n()) - _keys = keys(nt) - _values = values(nt) - return NamedTuple{_keys}(_call.(_values)) -end - -""" - model_supertype(interface) - -Return, if this can be inferred, which of `Deterministic`, -`Probabilistic` and `Unsupervised` is the appropriate supertype for a -composite model obtained by exporting a learning network with the -specified learning network interface. - -$DOC_NETWORK_INTERFACES - -If a supertype cannot be inferred, `nothing` is returned. - -If the network with given `signature` is not exportable, this method -will not error but it will not a give meaningful return value either. - -**Private method.** - -""" -function model_supertype(signature) - - operations = _operations(signature) - - length(intersect(operations, (:predict_mean, :predict_median))) == 1 && - return Deterministic - - if :predict in operations - node = signature.predict - if node isa Source - return Deterministic - end - if node.machine !== nothing - model = node.machine.model - model isa Deterministic && return Deterministic - model isa Probabilistic && return Probabilistic - end - end - - return nothing - -end - - -# # FITRESULTS FOR COMPOSITE MODELS - -mutable struct CompositeFitresult - signature - glb - network_model_names - function CompositeFitresult(signature) - signature_node = glb(_nodes(signature)...) - new(signature, signature_node) - end -end -signature(c::CompositeFitresult) = getfield(c, :signature) -glb(c::CompositeFitresult) = getfield(c, :glb) - -# To accommodate pre-existing design (operations.jl) arrange -# that `fitresult.predict` returns the predict node, etc: -Base.propertynames(c::CompositeFitresult) = keys(signature(c)) -Base.getproperty(c::CompositeFitresult, name::Symbol) = - getproperty(signature(c), name) - - -# # LEARNING NETWORK MACHINES - -surrogate(::Type{<:Deterministic}) = Deterministic() -surrogate(::Type{<:Probabilistic}) = Probabilistic() -surrogate(::Type{<:Unsupervised}) = Unsupervised() -surrogate(::Type{<:Static}) = Static() - -caches_data_by_default(::Type{<:Surrogate}) = false - -const ERR_MUST_PREDICT = ArgumentError( - "You must specify at least `predict=`. ") -const ERR_MUST_TRANSFORM = ArgumentError( - "You must specify at least `transform=`. ") -const ERR_MUST_OPERATE = ArgumentError( - "You must specify at least one operation, as in `predict=`. ") -const ERR_MUST_SPECIFY_SOURCES = ArgumentError( - "You must specify at least one source `Xs`, as in "* - "`machine(surrogate_model, Xs, ...; kwargs...)`. ") -const ERR_BAD_SIGNATURE = ArgumentError( - "Only the following keyword arguments are supported in learning network "* - "machine constructors: `report` or one of: `$OPERATIONS`. ") -const ERR_EXPECTED_NODE_IN_SIGNATURE = ArgumentError( - "Learning network machine constructor syntax error. "* - "Did not enounter `Node` in place one was expected. ") - -function check_surrogate_machine(::Surrogate, signature, _sources) - isempty(_operations(signature)) && throw(ERR_MUST_OPERATE) - isempty(_sources) && throw(ERR_MUST_SPECIFY_SOURCES) - return nothing -end - -function check_surrogate_machine(::Union{Supervised,SupervisedAnnotator}, - signature, - _sources) - isempty(_operations(signature)) && throw(ERR_MUST_PREDICT) - length(_sources) > 1 || throw(err_supervised_nargs()) - return nothing -end - -function check_surrogate_machine(::Union{Unsupervised}, - signature, - _sources) - isempty(_operations(signature)) && throw(ERR_MUST_TRANSFORM) - length(_sources) < 2 || throw(err_unsupervised_nargs()) - return nothing -end - -const WARN_NETWORK_MACHINES_DEPRECATION = - "Learning network machines are deprecated. For the recommended way of exporting "* - "learning networks as new stand-alone model types, see the \"Learning Networks\" "* - "section of the MLJ manual. " - -function machine(model::Surrogate, _sources::Source...; depwarn=true, pair_itr...) - - depwarn && Base.depwarn(WARN_NETWORK_MACHINES_DEPRECATION, :machine, force=true) - - # named tuple, such as `(predict=yhat, transform=W)`: - signature = (; pair_itr...) - - # signature checks: - isempty(_operations(signature)) && throw(ERR_MUST_OPERATE) - for k in keys(signature) - if k in OPERATIONS - getproperty(signature, k) isa AbstractNode || - throw(ERR_EXPECTED_NODE_IN_SIGNATURE) - elseif k === :report - all(v->v isa AbstractNode, values(signature.report)) || - throw(ERR_EXPECTED_NODE_IN_SIGNATURE) - else - throw(ERR_BAD_SIGNATURE) - end - end - - check_surrogate_machine(model, signature, _sources) - - mach = Machine(model, _sources...) - - mach.fitresult = CompositeFitresult(signature) - - return mach - -end - -function machine(_sources::Source...; depwarn=true, pair_itr...) - - signature = (; pair_itr...) - - T = model_supertype(signature) - if T == nothing - @warn "Unable to infer surrogate model type. \n"* - "Using Deterministic(). To override specify "* - "surrogate model, as in "* - "`machine(Probabilistic(), ...)` or `machine(Interval(), ...)`" - model = Deterministic() - else - model = surrogate(T) - end - - return machine(model, _sources...; depwarn, pair_itr...) - -end - -""" - N = glb(mach::Machine{<:Union{Composite,Surrogate}}) - -A greatest lower bound for the nodes appearing in the learning network interface of -`mach`. - -$DOC_NETWORK_INTERFACES - -**Private method.** - -""" -glb(mach::Machine{<:Union{Composite,Surrogate}}) = glb(mach.fitresult) - -""" - report(fitresult::CompositeFitresult) - -Return a tuple combining the report from `fitresult.glb` (a `Node` report) with the -additions coming from nodes declared as report nodes in `fitresult.signature`, but without -merging the two. - -$DOC_NETWORK_INTERFACES - -**Private method** -""" -function report(fitresult::CompositeFitresult) - basic = report(glb(fitresult)) - additions = _call(_report_part(signature(fitresult))) - return (; basic, additions) -end - -""" - fit!(mach::Machine{<:Surrogate}; - rows=nothing, - acceleration=CPU1(), - verbosity=1, - force=false)) - -Train the complete learning network wrapped by the machine `mach`. - -More precisely, if `s` is the learning network signature used to -construct `mach`, then call `fit!(N)`, where `N` is a greatest lower -bound of the nodes appearing in the signature (values in the signature -that are not `AbstractNode` are ignored). For example, if `s = -(predict=yhat, transform=W)`, then call `fit!(glb(yhat, W))`. - -See also [`machine`](@ref) - -""" -function fit!(mach::Machine{<:Surrogate}; kwargs...) - glb = MLJBase.glb(mach) - fit!(glb; kwargs...) - mach.state += 1 - mach.report = Dict{Symbol,Any}(:fit => MLJBase.report(mach.fitresult)) - mach.old_model = deepcopy(mach.model) - return mach -end - -MLJModelInterface.fitted_params(mach::Machine{<:Surrogate}) = - fitted_params(glb(mach)) - - -# # CONSTRUCTING THE RETURN VALUE FOR A COMPOSITE FIT METHOD - -logerr_identical_models(name, model) = - "The hyperparameters $name of "* - "$model have identical model "* - "instances as values. " -const ERR_IDENTICAL_MODELS = ArgumentError( - "Two distinct hyper-parameters of a "* - "composite model that are both "* - "associated with models in the underlying learning "* - "network (eg, any two components of a `@pipeline` model) "* - "cannot have identical values, although they can be `==` "* - "(corresponding nested properties are `==`). "* - "Consider constructing instances "* - "separately or use `deepcopy`. ") - -# Identify which properties of `model` have, as values, a model in the -# learning network wrapped by `mach`, and check that no two such -# properties have have identical values (#377). Return the property name -# associated with each model in the network (in the order appearing in -# `models(glb(mach))`) using `nothing` when the model is not -# associated with any property. -network_model_names(model::Nothing, mach::Machine{<:Surrogate}) = nothing - -function network_model_names(model::M, mach::Machine{<:Surrogate}) where M<:Model - - network_model_ids = objectid.(MLJBase.models(glb(mach))) - - names = propertynames(model) - - # intialize dict to detect duplicity a la #377: - name_given_id = Dict{UInt64,Vector{Symbol}}() - - # identify location of properties whose values are models in the - # learning network, and build name_given_id: - for name in names - id = objectid(getproperty(model, name)) - if id in network_model_ids - if haskey(name_given_id, id) - push!(name_given_id[id], name) - else - name_given_id[id] = [name,] - end - end - end - - # perform #377 check: - no_duplicates = all(values(name_given_id)) do name - length(name) == 1 - end - if !no_duplicates - for (id, name) in name_given_id - if length(name) > 1 - @error logerr_identical_models(name, model) - end - end - throw(ERR_IDENTICAL_MODELS) - end - - return map(network_model_ids) do id - if id in keys(name_given_id) - return name_given_id[id] |> first - else - return nothing - end - end - -end - -const WARN_RETURN_DEPWARN = - "The use of `return!` is deprecated. For the recommended way of exporting "* - "learning networks as new stand-alone model types, see the \"Learning Networks\" "* - "section of the MLJ manual. " - -""" - - return!(mach::Machine{<:Surrogate}, model, verbosity; acceleration=CPU1()) - -The last call in custom code defining the `MLJBase.fit` method for a -new composite model type. Here `model` is the instance of the new type -appearing in the `MLJBase.fit` signature, while `mach` is a learning -network machine constructed using `model`. Not relevant when defining -composite models using `@pipeline` (deprecated) or `@from_network`. - -For usage, see the example given below. Specifically, the call does -the following: - -- Determines which hyper-parameters of `model` point to model - instances in the learning network wrapped by `mach`, for recording - in an object called `cache`, for passing onto the MLJ logic that - handles smart updating (namely, an `MLJBase.update` fallback for - composite models). - -- Calls `fit!(mach, verbosity=verbosity, acceleration=acceleration)`. - -- Records (among other things) a copy of `model` in a variable called `cache` - -- Returns `cache` and outcomes of training in an appropriate form - (specifically, `(mach.fitresult, cache, mach.report)`; see [Adding - Models for General - Use](https://alan-turing-institute.github.io/MLJ.jl/dev/adding_models_for_general_use/) - for technical details.) - - -### Example - -The following code defines, "by hand", a new model type `MyComposite` -for composing standardization (whitening) with a deterministic -regressor: - -``` -mutable struct MyComposite <: DeterministicComposite - regressor -end - -function MLJBase.fit(model::MyComposite, verbosity, X, y) - Xs = source(X) - ys = source(y) - - mach1 = machine(Standardizer(), Xs) - Xwhite = transform(mach1, Xs) - - mach2 = machine(model.regressor, Xwhite, ys) - yhat = predict(mach2, Xwhite) - - mach = machine(Deterministic(), Xs, ys; predict=yhat) - return!(mach, model, verbosity) -end -``` - -""" -function return!(mach::Machine{<:Surrogate}, - model::Union{Model,Nothing}, - verbosity; - acceleration=CPU1(), depwarn=true) - - depwarn && Base.depwarn(WARN_RETURN_DEPWARN, :return!, force=true) - - network_model_names_ = network_model_names(model, mach) - - verbosity isa Nothing || fit!(mach, verbosity=verbosity, acceleration=acceleration) - setfield!(mach.fitresult, :network_model_names, network_model_names_) - - # record the current hyper-parameter values: - old_model = deepcopy(model) - - glb = MLJBase.glb(mach) - cache = (; old_model) - - return mach.fitresult, cache, report_given_method(mach)[:fit] -end - - - -############################################################################### -##### SAVE AND RESTORE FOR COMPOSITES ##### -############################################################################### - - -# Returns a new `CompositeFitresult` that is a shallow copy of the original one. -function save(model::Composite, fitresult) - interface = MLJBase.signature(fitresult) - newsignature = replace(Signature(interface), serializable=true) |> unwrap - newfitresult = MLJBase.CompositeFitresult(newsignature) - setfield!( - newfitresult, - :network_model_names, - getfield(fitresult, :network_model_names) - ) - return newfitresult -end - - -# Restores a machine of a composite model by restoring all -# submachines contained in it. -function restore!(mach::Machine{<:Composite}) - glb_node = glb(mach) - for submach in machines(glb_node) - restore!(submach) - end - mach.state = 1 - return mach -end - -function report_for_serialization(mach::Machine{<:Composite}) - basic = report(glb(mach.fitresult)) - additions = report_given_method(mach)[:fit].additions - return Dict{Symbol,Any}(:fit => (; basic, additions)) -end diff --git a/src/composition/learning_networks/replace.jl b/src/composition/learning_networks/replace.jl index 3c0cc248..175c4b91 100644 --- a/src/composition/learning_networks/replace.jl +++ b/src/composition/learning_networks/replace.jl @@ -177,29 +177,6 @@ function Base.replace(signature::Signature, pairs::Pair...; node_dict=false, kwa return newsignature, newnode_given_old end -""" - replace(mach, a1=>b1, a2=>b2, ...; options...) - -Return a copy the learning network machine `mach`, and it's underlying learning network, -but replacing any specified sources and models `a1, a2, ...` of the original underlying -network with `b1, b2, ...`. - -$DOC_REPLACE_OPTIONS - -""" -function Base.replace(mach::Machine{<:Surrogate}, pairs::Pair...; kwargs...) - signature = MLJBase.signature(mach.fitresult) |> Signature - - newsignature, newnode_given_old = - replace(signature, pairs...; node_dict=true, kwargs...) - - newinterface = unwrap(newsignature) - - newargs = [newnode_given_old[arg] for arg in mach.args] - - return machine(mach.model, newargs...; newinterface...) -end - # Copy the complete learning network having `W` as a greatest lower bound, executing the # specified replacements, and return the dictionary mapping old nodes to new nodes. function _replace( diff --git a/src/composition/learning_networks/signatures.jl b/src/composition/learning_networks/signatures.jl index b224cd47..d49aace9 100644 --- a/src/composition/learning_networks/signatures.jl +++ b/src/composition/learning_networks/signatures.jl @@ -356,7 +356,7 @@ function fitted_params(signature::Signature; supplement=true) end """ - output_and_report(signature, operation, Xnew) + output_and_report(signature, operation, Xnew...) **Private method.** @@ -375,3 +375,6 @@ function output_and_report(signature, operation, Xnew) report = MLJBase.report(signature_clone; supplement=false) return output, report end +# special case for static transformers with multiple inputs: +output_and_report(signature, operation, Xnew...) = + output_and_report(signature, operation, Xnew) diff --git a/src/composition/models/deprecated_from_network.jl b/src/composition/models/deprecated_from_network.jl deleted file mode 100644 index dfe27807..00000000 --- a/src/composition/models/deprecated_from_network.jl +++ /dev/null @@ -1,272 +0,0 @@ -## EXPORTING LEARNING NETWORKS AS MODELS WITH @from_network - -# closure to generate the fit methods for exported composite. Here -# `mach` is a learning network machine. -function fit_method(mach, models...) - - signature = mach.fitresult - mach_args = mach.args - - function _fit(model, verbosity::Integer, args...) - length(args) > length(mach_args) && - throw(ArgumentError("$M does not support more than "* - "$(length(mach_args)) training arguments")) - replacement_models = [getproperty(model, fld) - for fld in propertynames(model)] - model_replacements = [models[j] => replacement_models[j] - for j in eachindex(models)] - source_replacements = [mach_args[i] => source(args[i]) - for i in eachindex(args)] - replacements = vcat(model_replacements, source_replacements) - - new_mach = - replace(mach, replacements...; empty_unspecified_sources=true) - - return!(new_mach, model, verbosity; depwarn=false) - end - - return _fit -end - -net_error(message) = throw(ArgumentError("Learning network export error.\n"* - string(message))) -net_error(k::Int) = throw(ArgumentError("Learning network export error $k. ")) - -_insert_subtyping(ex, subtype_ex) = - Expr(:(<:), ex, subtype_ex) - -# create the exported type symbol, e.g. abstract_type(T) == Unsupervised -# would result in :UnsupervisedComposite -_exported_type(T::Model) = Symbol(nameof(abstract_type(T)), :Composite) - -function eval_and_reassign(modl, ex) - s = gensym() - evaluated = modl.eval(ex) - if evaluated isa Symbol - hack = String(evaluated) - modl.eval(:($s = Symbol($hack))) - else - modl.eval(:($s = $evaluated)) - end - return s, evaluated -end - -function without_line_numbers(block_ex) - block_ex.head == :block || throw(ArgumentError) - args = filter(block_ex.args) do arg - !(arg isa LineNumberNode) - end - return Expr(:block, args...) -end - -function from_network_preprocess(modl, mach_ex, block_ex) - - mach_ex, mach = eval_and_reassign(modl, mach_ex) - mach isa Machine{<:Surrogate} || - net_error("$mach is not a learning network machine. ") - if block_ex.head == :block - block_ex = without_line_numbers(block_ex) - struct_ex = block_ex.args[1] - trait_declaration_exs = block_ex.args[2:end] - elseif block_ex.head == :struct - struct_ex = block_ex - trait_declaration_exs = [] - else - net_error("Expected `struct`, `mutable struct` or "* - "`begin ... end` block, but got `$block_ex` ") - end - - # if necessary add or modify struct subtyping: - if struct_ex.args[2] isa Symbol - struct_ex.args[2] = _insert_subtyping(struct_ex.args[2], - _exported_type(mach.model)) - modeltype_ex = struct_ex.args[2].args[1] - elseif struct_ex.args[2] isa Expr - struct_ex.args[2].head == :(<:) || - net_error("Badly formed `struct` subtying. ") - modeltype_ex = struct_ex.args[2].args[1] - super = eval(struct_ex.args[2].args[2]) - inferred_super_ex = _exported_type(mach.model) - if !(super <: Composite) - @warn "New composite type must subtype `Composite` but "* - "`$super` does not. Instead declaring "* - "`$modeltype_ex <: $inferred_super_ex`. " - struct_ex.args[2].args[2] = inferred_super_ex - end - else - net_error(41) - end - - # test if there are no fields: - field_exs = without_line_numbers(struct_ex.args[3]).args - no_fields = isempty(field_exs) - - # extract trait definitions: - trait_ex_given_name_ex = Dict{Symbol,Any}() - - ne() = net_error("Bad trait declaration. ") - for ex in trait_declaration_exs - ex isa Expr || ne() - ex.head == :(=) || ne() - ex.args[1] isa Symbol || ne() - ex.args[1] in MLJModelInterface.MODEL_TRAITS || - net_error("Expected a model trait as keywork but "* - "got $(ex.args[2]). Options are:\n"* - "$MLJModelInterface.MODEL_TRAIES. ") - length(ex.args) == 2 || ne() - trait_ex_given_name_ex[ex.args[1]] = ex.args[2] - end - - return mach_ex, modeltype_ex, struct_ex, no_fields, trait_ex_given_name_ex - -end - -function from_network_(modl, - mach_ex, - modeltype_ex, - struct_ex, - no_fields, - trait_ex_given_name_ex) - - args = gensym(:args) - models = gensym(:models) - instance = gensym(:instance) - - # Define the new model type with keyword constructor: - if no_fields - modl.eval(struct_ex) - else - modl.eval(MLJBase.Parameters.with_kw(struct_ex, modl, false)) - end - - # Test that an instance can be created: - try - modl.eval(:($modeltype_ex())) - catch e - @error "Problem instantiating a default instance of the "* - "new composite type. Each field name in the struct expression "* - "must have a corresponding model instance (that also appears "* - "somewhere in the network). "* - "Perhaps you forgot to specify one of these?" - throw(e) - end - - # code defining fit method: - program1 = quote - - $(isdefined(modl, :MLJ) ? :(import MLJ.MLJBase) : :(import MLJBase)) - $(isdefined(modl, :MLJ) ? :(import MLJ.MLJBase.MLJModelInterface) : - :(import MLJBase.MLJModelInterface)) - - $instance = $modeltype_ex() - $models = [getproperty($instance, name) - for name in fieldnames($modeltype_ex)] - - MLJModelInterface.fit(model::$modeltype_ex, verb::Integer, $args...) = - MLJBase.fit_method($mach_ex, $models...)(model, verb, $args...) - - end - - modl.eval(program1) - - # define composite model traits: - for (name_ex, value_ex) in trait_ex_given_name_ex - program = quote - MLJBase.$name_ex(::Type{<:$modeltype_ex}) = $value_ex - end - modl.eval(program) - end - - return nothing - -end - -const WARN_FROM_NETWORK_DEPRECATION = - "The `@from_network` macro is deprecated. See the \"Learning Networks\" section "* - "of the MLJ manual for recommended way to export learning networks as new "* - "composite model types. " - -""" - - @from_network mach [mutable] struct NewCompositeModel - ... - end - -or - - @from_network mach begin - [mutable] struct NewCompositeModel - ... - end - - end - -Create a new stand-alone model type called `NewCompositeModel`, using -the specified learning network machine `mach` as a blueprint. - -For more on learning network machines, see [`machine`](@ref). - - -### Example - -Consider the following simple learning network for training a decision -tree after one-hot encoding the inputs, and forcing the predictions to -be point-predictions (rather than probabilistic): - -```julia -Xs = source() -ys = source() - -hot = OneHotEncoder() -tree = DecisionTreeClassifier() - -W = transform(machine(hot, Xs), Xs) -yhat = predict_mode(machine(tree, W, ys), W) -``` - -A learning network machine is defined by - -```julia -mach = machine(Deterministic(), Xs, ys; predict=yhat) -``` - -To specify a new `Deterministic` composite model type `WrappedTree` we -specify the model instances appearing in the network as "default" -values in the following decorated struct definition: - -```julia -@from_network mach struct WrappedTree - encoder=hot - decision_tree=tree -end -``` -and create a new instance with `WrappedTree()`. - -To allow the second model component to be replaced by any other -probabilistic model we instead make a mutable struct declaration and, -if desired, annotate types appropriately. In the following code -illustration some model trait declarations have also been added: - -```julia -@from_network mach begin - mutable struct WrappedTree - encoder::OneHotEncoder=hot - classifier::Probabilistic=tree - end - input_scitype = Table(Continuous, Finite) - is_pure_julia = true -end -``` - -""" -macro nodepwarn_from_network(exs...) - args = from_network_preprocess(__module__, exs...) - modeltype_ex = args[2] - from_network_(__module__, args...) -end -macro from_network(exs...) - Base.depwarn(WARN_FROM_NETWORK_DEPRECATION, :from_network, force=true) - args = from_network_preprocess(__module__, exs...) - modeltype_ex = args[2] - from_network_(__module__, args...) -end diff --git a/src/composition/models/deprecated_methods.jl b/src/composition/models/deprecated_methods.jl deleted file mode 100644 index 38b48e69..00000000 --- a/src/composition/models/deprecated_methods.jl +++ /dev/null @@ -1,74 +0,0 @@ -## FALL-BACK METHODS FOR COMPOSITE MODELS (EXPORTED LEARNING NETWORKS) - -# *Note.* Be sure to read Note 4 in src/operations.jl to see see how -# fallbacks are provided for operations acting on Composite models. - -caches_data_by_default(::Type{<:Composite}) = true - -# builds on `fitted_params(::CompositeFitresult)` defined in -# composition/learning_networks/machines.jl: -fitted_params(::Union{Composite,Surrogate}, fitresult::CompositeFitresult) = - fitted_params(glb(fitresult)) - -function update(model::M, - verbosity::Integer, - fitresult::CompositeFitresult, - cache, - args...) where M <: Composite - - # This method falls back to `fit` to force rebuilding of the - # underlying learning network if, since the last fit: - # - # (i) Any hyper-parameter of `model` that has, as a value, a model in the network, has - # been replaced with a new value (and not merely mutated), OR - - # (ii) Any OTHER hyper-parameter has changed it's value (in the sense - # of `==`). - - # Otherwise, a "smart" fit is carried out by calling `fit!` on a - # greatest lower bound node for nodes in the signature of the - # underlying learning network machine. - - network_model_names = getfield(fitresult, :network_model_names) - - old_model = cache.old_model - glb = MLJBase.glb(fitresult) # greatest lower bound of network, a node - - if fallback(model, old_model, network_model_names, glb) - return fit(model, verbosity, args...) - end - - fit!(glb; verbosity=verbosity) - - # Retrieve additional report values - report = MLJBase.report(fitresult) - - # record current model state: - cache = (; old_model = deepcopy(model)) - - return (fitresult, - cache, - report) - -end - -# helper for preceding method (where logic is explained): -function fallback(model::M, old_model, network_model_names, glb_node) where M - # check the hyper-parameters corresponding to models: - network_models = MLJBase.models(glb_node) - for j in eachindex(network_models) - name = network_model_names[j] - name === nothing || - objectid(network_models[j])===objectid(getproperty(model, name)) || - return true - end - # check any other hyper-parameter: - for name in propertynames(model) - if !(name in network_model_names) - old_value = getproperty(old_model, name) - value = getproperty(model, name) - value == old_value || return true - end - end - return false -end diff --git a/src/composition/models/deprecated_pipelines.jl b/src/composition/models/deprecated_pipelines.jl deleted file mode 100644 index fbebd897..00000000 --- a/src/composition/models/deprecated_pipelines.jl +++ /dev/null @@ -1,11 +0,0 @@ -const ERR_PIPELINE = ErrorException( - "The `@pipeline` macro is deprecated. For pipelines without "* - "target transformations use pipe syntax, as in "* - "`ContinuousEncoder() |> Standardizer() |> my_classifier`. "* - "For details and advanced optioins, query the `Pipeline` docstring. "* - "To wrap a supervised model in a target transformation, use "* - "`TransformedTargetModel`, as in "* - "`TransformedTargetModel(my_regressor, target=Standardizer())`" -) - -macro pipeline(ex...) throw(ERR_PIPELINE) end diff --git a/src/composition/models/inspection.jl b/src/composition/models/inspection.jl deleted file mode 100644 index ece70326..00000000 --- a/src/composition/models/inspection.jl +++ /dev/null @@ -1,44 +0,0 @@ -## USER FRIENDLY INSPECTION OF COMPOSITE MACHINES - -try_scalarize(v) = length(v) == 1 ? v[1] : v - -function machines_given_model_name(mach::Machine{M}) where M<:Composite - network_model_names = getfield(mach.fitresult, :network_model_names) - names = unique(filter(name->!(name === nothing), network_model_names)) - glb = MLJBase.glb(mach) - network_models = MLJBase.models(glb) - network_machines = MLJBase.machines(glb) - ret = LittleDict{Symbol,Any}() - for name in names - mask = map(==(name), network_model_names) - _models = network_models[mask] - _machines = filter(mach->mach.model in _models, network_machines) - ret[name] = _machines - end - return ret -end - -function tuple_keyed_on_model_names(machines, mach, f) - dict = MLJBase.machines_given_model_name(mach) - names = tuple(keys(dict)...) - named_tuple_values = map(names) do name - [f(m) for m in dict[name]] |> try_scalarize - end - return NamedTuple{names}(named_tuple_values) -end - -function report(mach::Machine{<:Union{Composite,Surrogate}}) - report_additions = MLJBase.report_given_method(mach)[:fit].additions - report_basic = MLJBase.report_given_method(mach)[:fit].basic - report_components = mach isa Machine{<:Surrogate} ? NamedTuple() : - MLJBase.tuple_keyed_on_model_names(report_basic.machines, mach, MLJBase.report) - return merge(report_components, report_basic, report_additions) -end - -function fitted_params(mach::Machine{<:Composite}) - fp_basic = fitted_params(mach.model, mach.fitresult) - machines = fp_basic.machines - fp_components = - MLJBase.tuple_keyed_on_model_names(machines, mach, MLJBase.fitted_params) - return merge(fp_components, fp_basic) -end diff --git a/src/composition/models/pipelines.jl b/src/composition/models/pipelines.jl index 58da17c5..88e7c592 100644 --- a/src/composition/models/pipelines.jl +++ b/src/composition/models/pipelines.jl @@ -138,13 +138,7 @@ const ERR_MIXED_PIPELINE_SPEC = ArgumentError( "Either specify all pipeline components without names, as in "* "`Pipeline(model1, model2)` or specify names for all "* "components, as in `Pipeline(myfirstmodel=model1, mysecondmodel=model2)`. ") -const ERR_USING_TARGET_KWARG = ArgumentError( - "You are not permitted to name a pipeline component \"target\", "* - "as this may be confused with the `target` keyword argument for "* - "the older `@pipeline` macro. `Pipeline` does not support target "* - "transformations. To implement one, wrap a supervised "* - "`model` using `TransformedTargetModel`, as in "* - "`TransformedTargetModel(model, transformer=Standardizer())`. ") + # The following combines its arguments into a named tuple, performing # a number of checks and modifications. Specifically, it checks @@ -277,7 +271,6 @@ function Pipeline(args...; prediction_type=nothing, # construct the named tuple of components: if isempty(args) _names = keys(kwargs) - :target in _names && throw(ERR_USING_TARGET_KWARG) _components = values(values(kwargs)) else _names = Symbol[] @@ -586,13 +579,6 @@ function supervised_component(pipe::SupervisedPipeline) return getproperty(named_components, name) end -model_type(::Machine{M}) where M = M -function supervised(machines) - model_types = model_type.(machines) - idx = findfirst(M -> M <: Supervised, model_types) - return machines[idx] -end - # ## Traits diff --git a/src/composition/models/stacking.jl b/src/composition/models/stacking.jl index 4a760e24..ec872c16 100644 --- a/src/composition/models/stacking.jl +++ b/src/composition/models/stacking.jl @@ -378,14 +378,23 @@ model_2, ...), ...) function internal_stack_report( stack::Stack{modelnames,}, verbosity::Int, - tt_pairs, + tt_pairs, # train_test_pairs folds_evaluations... ) where modelnames n_measures = length(stack.measures) nfolds = length(tt_pairs) - # For each model we record the results mimicking the fields PerformanceEvaluation + test_fold_sizes = map(tt_pairs) do train_test_pair + test = last(train_test_pair) + length(test) + end + + # weights to be used to aggregate per-fold measurements (averaging to 1): + fold_weights(mode) = nfolds .* test_fold_sizes ./ sum(test_fold_sizes) + fold_weights(::StatisticalMeasuresBase.Sum) = nothing + + # For each model we record the results mimicking the fields of PerformanceEvaluation results = NamedTuple{modelnames}( [( model = model, @@ -393,7 +402,7 @@ function internal_stack_report( measurement = Vector{Any}(undef, n_measures), operation = _actual_operations(nothing, stack.measures, model, verbosity), per_fold = [Vector{Any}(undef, nfolds) for _ in 1:n_measures], - per_observation = Vector{Union{Missing, Vector{Any}}}(missing, n_measures), + per_observation = [Vector{Vector{Any}}(undef, nfolds) for _ in 1:n_measures], fitted_params_per_fold = [], report_per_fold = [], train_test_pairs = tt_pairs, @@ -419,30 +428,29 @@ function internal_stack_report( model_results.operation, )) ypred = operation(mach, Xtest) - loss = measure(ypred, ytest) - # Update per_observation - if reports_each_observation(measure) - if model_results.per_observation[i] === missing - model_results.per_observation[i] = Vector{Any}(undef, nfolds) - end - model_results.per_observation[i][foldid] = loss - end + measurements = StatisticalMeasuresBase.measurements(measure, ypred, ytest) + + # Update per observation: + model_results.per_observation[i][foldid] = measurements # Update per_fold - model_results.per_fold[i][foldid] = - reports_each_observation(measure) ? - MLJBase.aggregate(loss, measure) : loss + model_results.per_fold[i][foldid] = measure(ypred, ytest) end index += 1 end end - # Update measurement field by aggregation + # Update measurement field by aggregating per-fold measurements for modelname in modelnames for (i, measure) in enumerate(stack.measures) model_results = results[modelname] + mode = StatisticalMeasuresBase.external_aggregation_mode(measure) model_results.measurement[i] = - MLJBase.aggregate(model_results.per_fold[i], measure) + StatisticalMeasuresBase.aggregate( + model_results.per_fold[i]; + mode, + weights=fold_weights(mode), + ) end end diff --git a/src/composition/models/transformed_target_model.jl b/src/composition/models/transformed_target_model.jl index 02fca5eb..7b72419a 100644 --- a/src/composition/models/transformed_target_model.jl +++ b/src/composition/models/transformed_target_model.jl @@ -140,14 +140,11 @@ tmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.( function TransformedTargetModel( args...; model=nothing, - target=nothing, # to be deprecated - transformer=target, # then this should be `nothing` + transformer=nothing, inverse=nothing, cache=true, ) - isnothing(target) || - Base.depwarn(WARN_TARGET_DEPRECATED, :TransformedTargetModel, force=true) length(args) < 2 || throw(ERR_TOO_MANY_ARGUMENTS) if length(args) === 1 diff --git a/src/default_measures.jl b/src/default_measures.jl new file mode 100644 index 00000000..2488bbf5 --- /dev/null +++ b/src/default_measures.jl @@ -0,0 +1,23 @@ +# # DEFAULT MEASURES + +""" + default_measure(model) + +Return a measure that should work with `model`, or return `nothing` if none can be +reliably inferred. + +For Julia 1.9 and higher, `nothing` is returned, unless StatisticalMeasures.jl is +loaded. + +# New implementations + +This method dispatches `default_measure(model, observation_scitype)`, which has +`nothing` as the fallback return value. Extend `default_measure` by overloading this +version of the method. See for example the MLJBase.jl package extension, +DefaultMeausuresExt.jl. + +""" +default_measure(m) = nothing +default_measure(m::Union{Supervised,Annotator}) = + default_measure(m, nonmissingtype(guess_model_target_observation_scitype(m))) +default_measure(m, S) = nothing diff --git a/src/machines.jl b/src/machines.jl index 50544212..b6fabca6 100644 --- a/src/machines.jl +++ b/src/machines.jl @@ -682,7 +682,6 @@ function fit_only!( @error "Problem fitting the machine $mach. " _sources = sources(glb(mach.args...)) length(_sources) > 2 || - model isa Composite || all((!isempty).(_sources)) || @warn "Some learning network source nodes are empty. " @info "Running type checks... " diff --git a/src/measures/README.md b/src/measures/README.md deleted file mode 100644 index 0097d2f7..00000000 --- a/src/measures/README.md +++ /dev/null @@ -1,117 +0,0 @@ -## Adding new measures - -This document assumes familiarity with the traits provided for -measures. For a summary, query the docstring for -`MLJBase.metadata_measures`. - -A measure is ordinarily called on data directly, as in - -```julia -ŷ = rand(3) # predictions -y = rand(3) # ground truth observations - -m = LPLoss(p=3) - -julia> m(ŷ, y) -3-element Vector{Float64}: - 0.07060087052171798 - 0.003020044780949528 - 0.019067038457889922 -``` - -To call a measure without performing dimension or pool checks, one -uses `MLJBase.call` instead: - -```julia -MLJBase.call(m, ŷ, y) -``` - -A new measure reporting an aggregate measurement, such as -`AreaUnderCurve`, will subtype `Aggregate`, and only needs to -implement `call`. A measure that reports a measurement for each -observation , such as `LPLoss`, subtypes `Unaggregated` and only needs -to implement an evaluation method for single observations called -`single`. - -Recall also that if a measure reports each observation, it does so -even in the case that weights are additionally specified: - -```julia -w = rand(3) # per-observation weights - -julia> m(ŷ, y, rand(3)) -3-element Vector{Float64}: - 0.049333392516241206 - 0.0017612002314472718 - 0.003157450446692638 - ``` - -This behaviour differs from other places where weights can only be -specified as part of an aggregation of multi-observation measurements. - - -### Unaggregated measures implement `single` - -To implement an `Unaggregated` measure, it suffices to implement `single(measure, η̂, η)`, -which should return a measurement (e.g., a float) for a single example `(η̂, η)` (e.g., a -pair of floats). There is no need for `single` to handle `missing` values. (Internally, a -wrapper function `robust_single` handles these.) - -If only `single` is implemented, then the measure will automatically -support per-observation weights and, where that makes sense, per-class -weights. However, `supports_class_weights` may need to be overloaded, -as this defaults to `false`. - -#### Special cases - -If `single` is *not* implemented, then `call(measure, ŷ, y)`, and optionally -`call(measure, ŷ, y, w)`, must be implemented (the fallbacks call `robust_single`, a -wrapped version of `single` that handles `missing` values). In this case `y` and `ŷ` are -arrays of matching size and the method should return an array of that size *without -performing size or pool checks*. The method should handle `missing` and `NaN` values if -possible, which should be propagated to relevant elements of the returned array. - -The `supports_weights` trait, which defaults to `true`, will need to -be overloaded to return `false` if neither `single(::MyMeasure, -args...)` nor `call(::MyMeasure, ŷ, y, w::AbstractArray)` are -overloaded. - -### Aggregated measures implement `call` - -To implement an `Aggregated` measure, implement -`call(measure::MyMeasure, ŷ, y)`. Optionally implement -`call(measure::MyMeasure, ŷ, y, w)`. - - -### Trait declarations - -Measure traits can be set using the `metadata_measure` -function (query the doc-string) or individually, as in - -```julia -supports_weights(::Type{<:MyMeasure}) = false -``` - -Defaults are shown below - -trait | allowed values | default --------------------------|------------------------------|-------------- -`target_scitype` | some scientific type | `Unknown` -`human_name` | any `String` | string version of type name -`instances` | any `Vector{String}` | empty -`prediction_type` | `:deterministic`, `:probabilistic`, `:interval` `:unknown` | `:unknown` -`orientation` | `:score`, `:loss`, `:unknown`| `:unknown` -`aggregation` | `Mean()`, `Sum()`, `RootMeanSqaure()` | `Mean()` -`supports_weights` | `true` or `false` | `true` -`supports_class_weights` | `true` or `false` | `false` -`docstring` | any `String` | includes `name`, `human_name` and `instances` -`distribution_type` | any `Distribution` subtype or `Unknown` | `Unknown` - -### Exporting the measure and its aliases - -If you create a type alias, as in `const MAE = MeanAbsoluteValue`, -then you must add this alias to the constant -`MEASURE_TYPE_ALIASES`. That is the only step needed, as the the macro -`@export_measures` programmatically exports all measure types and -their instances, and those aliases listed in = MeanAbsoluteValue`, -then you must add this alias to the constant `MEASURE_TYPE_ALIASES`. diff --git a/src/measures/confusion_matrix.jl b/src/measures/confusion_matrix.jl deleted file mode 100644 index fd35dd26..00000000 --- a/src/measures/confusion_matrix.jl +++ /dev/null @@ -1,273 +0,0 @@ -## CONFUSION MATRIX OBJECT - -""" - ConfusionMatrixObject{C} - -Confusion matrix with `C ≥ 2` classes. Rows correspond to predicted values -and columns to the ground truth. -""" -struct ConfusionMatrixObject{C} - mat::Matrix - labels::Vector{String} -end - -""" - ConfusionMatrixObject(m, labels) - -Instantiates a confusion matrix out of a square integer matrix `m`. -Rows are the predicted class, columns the ground truth. See also the -[wikipedia article](https://en.wikipedia.org/wiki/Confusion_matrix). - -""" -function ConfusionMatrixObject(m::Matrix{Int}, labels::Vector{String}) - s = size(m) - s[1] == s[2] || throw(ArgumentError("Expected a square matrix.")) - s[1] > 1 || throw(ArgumentError("Expected a matrix of size ≥ 2x2.")) - length(labels) == s[1] || - throw(ArgumentError("As many labels as classes must be provided.")) - ConfusionMatrixObject{s[1]}(m, labels) -end - -# allow to access cm[i,j] but not set (it's immutable) -Base.getindex(cm::ConfusionMatrixObject, inds...) = getindex(cm.mat, inds...) - -_levels(y1, y2) = vcat(levels(y1), levels(y2)) |> unique - -# simultaneous coercion of two vectors into categorical vectors having -# the same pool: -function _categorical(y1, y2) - L = _levels(y1, y2) - return categorical(y1, levels=L), categorical(y2, levels=L) -end -_categorical(y1::CategoricalArray{V1,N}, - y2::CategoricalArray{V2,N}) where - {V, V1<:Union{Missing,V}, V2<:Union{Missing,V}, N} = - y1, y2 -_categorical(y1::AbstractArray{<:CategoricalArrays.CategoricalValue}, - y2::AbstractArray{<:CategoricalArrays.CategoricalValue}) = - broadcast(identity, y1), broadcast(identity, y2) - - -""" - _confmat(ŷ, y; rev=false) - -A private method. General users should use `confmat` or other instances -of the measure type [`ConfusionMatrix`](@ref). - -Computes the confusion matrix given a predicted `ŷ` with categorical elements -and the actual `y`. Rows are the predicted class, columns the ground truth. -The ordering follows that of `levels(y)`. - -## Keywords - -* `rev=false`: in the binary case, this keyword allows to swap the ordering of - classes. -* `perm=[]`: in the general case, this keyword allows to specify a permutation - re-ordering the classes. -* `warn=true`: whether to show a warning in case `y` does not have scientific - type `OrderedFactor{2}` (see note below). - -## Note - -To decrease the risk of unexpected errors, if `y` does not have -scientific type `OrderedFactor{2}` (and so does not have a "natural -ordering" negative-positive), a warning is shown indicating the -current order unless the user explicitly specifies either `rev` or -`perm` in which case it's assumed the user is aware of the class -ordering. - -The `confusion_matrix` is a measure (although neither a score nor a -loss) and so may be specified as such in calls to `evaluate`, -`evaluate!`, although not in `TunedModel`s. In this case, however, -there no way to specify an ordering different from `levels(y)`, where -`y` is the target. - -""" -function _confmat(ŷraw::Union{Arr{V1,N}, CategoricalArray{V1,N}}, - yraw::Union{Arr{V2,N}, CategoricalArray{V2,N}}; - rev::Union{Nothing,Bool}=nothing, - perm::Union{Nothing,Vector{<:Integer}}=nothing, - warn::Bool=true) where - {V,V1<:Union{Missing,V}, V2<:Union{Missing,V},N} - - # no-op if vectors already categorical arrays: - ŷ, y = _categorical(ŷraw, yraw) - - levels_ = levels(y) - nc = length(levels_) - if rev !== nothing && rev && nc > 2 - throw(ArgumentError("Keyword `rev` can only be used in binary case.")) - end - if perm !== nothing && !isempty(perm) - length(perm) == nc || - throw(ArgumentError("`perm` must be of length matching the "* - "number of classes.")) - Set(perm) == Set(collect(1:nc)) || - throw(ArgumentError("`perm` must specify a valid permutation of "* - "`[1, 2, ..., c]`, where `c` is "* - "number of classes.")) - end - - # warning - if rev === nothing && perm === nothing - S = nonmissingtype(elscitype(y)) - if warn - if nc==2 && !(S <: OrderedFactor) - @warn "The classes are un-ordered,\n" * - "using: negative='$(levels_[1])' "* - "and positive='$(levels_[2])'.\n" * - "To suppress this warning, consider coercing "* - "to OrderedFactor." - elseif !(S <: OrderedFactor) - @warn "The classes are un-ordered,\n" * - "using order: $([l for l in levels_]).\n" * - "To suppress this warning, consider "* - "coercing to OrderedFactor." - end - end - rev = false - perm = Int[] - elseif rev !== nothing && nc == 2 - # rev takes precedence in binary case - if rev - perm = [2, 1] - else - perm = Int[] - end - end - - # No permutation - if isempty(perm) - cmat = zeros(Int, nc, nc) - @inbounds for i in eachindex(y) - (isinvalid(y[i]) || isinvalid(ŷ[i])) && continue - cmat[int(ŷ[i]), int(y[i])] += 1 - end - return ConfusionMatrixObject(cmat, string.(levels_)) - end - - # With permutation - cmat = zeros(Int, nc, nc) - iperm = invperm(perm) - @inbounds for i in eachindex(y) - (isinvalid(y[i]) || isinvalid(ŷ[i])) && continue - cmat[iperm[int(ŷ[i])], iperm[int(y[i])]] += 1 - end - return ConfusionMatrixObject(cmat, string.(levels_[perm])) -end - - -# Machinery to display the confusion matrix in a non-confusing way -# (provided the REPL is wide enough) - -splitw(w::Int) = (sp1 = div(w, 2); sp2 = w - sp1; (sp1, sp2)) - -function Base.show(stream::IO, m::MIME"text/plain", cm::ConfusionMatrixObject{C} - ) where C - width = displaysize(stream)[2] - mincw = ceil(Int, 12/C) - cw = max(length(string(maximum(cm.mat))),maximum(length.(cm.labels)),mincw) - firstcw = max(length(string(maximum(cm.mat))),maximum(length.(cm.labels)),9) - textlim = 9 - totalwidth = firstcw + cw * C + C + 2 - width < totalwidth && (show(stream, m, cm.mat); return) - - iob = IOBuffer() - wline = s -> write(iob, s * "\n") - splitcw = s -> (w = cw - length(s); splitw(w)) - splitfirstcw = s -> (w = firstcw - length(s); splitw(w)) - cropw = s -> length(s) > textlim ? s[1:prevind(s, textlim)] * "…" : s - - # 1.a top box - " "^(firstcw+1) * "┌" * "─"^((cw + 1) * C - 1) * "┐" |> wline - gt = "Ground Truth" - w = (cw + 1) * C - 1 - length(gt) - sp1, sp2 = splitw(w) - " "^(firstcw+1) * "│" * " "^sp1 * gt * " "^sp2 * "│" |> wline - # 1.b separator - "┌" * "─"^firstcw * "┼" * ("─"^cw * "┬")^(C-1) * "─"^cw * "┤" |> wline - # 2.a description line - pr = "Predicted" - sp1, sp2 = splitfirstcw(pr) - partial = "│" * " "^sp1 * pr * " "^sp2 * "│" - for c in 1:C - # max = 10 - s = cm.labels[c] |> cropw - sp1, sp2 = splitcw(s) - partial *= " "^sp1 * s * " "^sp2 * "│" - end - partial |> wline - # 2.b separating line - "├" * "─"^firstcw * "┼" * ("─"^cw * "┼")^(C-1) * ("─"^cw * "┤") |> wline - # 2.c line by line - for c in 1:C - # line - s = cm.labels[c] |> cropw - sp1, sp2 = splitfirstcw(s) - partial = "│" * " "^sp1 * s * " "^sp2 * "│" - for r in 1:C - e = string(cm[c, r]) - sp1, sp2 = splitcw(e) - partial *= " "^sp1 * e * " "^sp2 * "│" - end - partial |> wline - # separator - if c < C - "├" * "─"^firstcw * "┼" * ("─"^cw * "┼")^(C-1) * ("─"^cw * "┤") |> wline - end - end - # 2.d final line - "└" * "─"^firstcw * "┴" * ("─"^cw * "┴")^(C-1) * ("─"^cw * "┘") |> wline - write(stream, take!(iob)) -end - - -## CONFUSION MATRIX AS MEASURE - -struct ConfusionMatrix <: Aggregated - perm::Union{Nothing,Vector{<:Integer}} -end - -ConfusionMatrix(; perm=nothing) = ConfusionMatrix(perm) - -is_measure(::ConfusionMatrix) = true -is_measure_type(::Type{ConfusionMatrix}) = true -human_name(::Type{<:ConfusionMatrix}) = "confusion matrix" -target_scitype(::Type{ConfusionMatrix}) = - Union{AbstractVector{<:Union{Missing,OrderedFactor}}, - AbstractVector{<:Union{Missing,OrderedFactor}}} -supports_weights(::Type{ConfusionMatrix}) = false -prediction_type(::Type{ConfusionMatrix}) = :deterministic -instances(::Type{<:ConfusionMatrix}) = ["confusion_matrix", "confmat"] -orientation(::Type{ConfusionMatrix}) = :other -reports_each_observation(::Type{ConfusionMatrix}) = false -is_feature_dependent(::Type{ConfusionMatrix}) = false -aggregation(::Type{ConfusionMatrix}) = Sum() - -@create_aliases ConfusionMatrix - -@create_docs(ConfusionMatrix, -body= -""" -If `r` is the return value, then the raw confusion matrix is `r.mat`, -whose rows correspond to predictions, and columns to ground truth. -The ordering follows that of `levels(y)`. - -Use `ConfusionMatrix(perm=[2, 1])` to reverse the class order for binary -data. For more than two classes, specify an appropriate permutation, as in -`ConfusionMatrix(perm=[2, 3, 1])`. - -""", -scitype=DOC_ORDERED_FACTOR_BINARY) - -# calling behaviour: -call(m::ConfusionMatrix, ŷ, y) = _confmat(ŷ, y, perm=m.perm) - -# overloading addition to make aggregation work: -Base.round(m::MLJBase.ConfusionMatrixObject; kws...) = m -function Base.:+(m1::ConfusionMatrixObject, m2::ConfusionMatrixObject) - if m1.labels != m2.labels - throw(ArgumentError("Confusion matrix labels must agree")) - end - ConfusionMatrixObject(m1.mat + m2.mat, m1.labels) -end diff --git a/src/measures/continuous.jl b/src/measures/continuous.jl deleted file mode 100644 index 33670216..00000000 --- a/src/measures/continuous.jl +++ /dev/null @@ -1,315 +0,0 @@ -const InfiniteArrMissing = Union{ - AbstractArray{<:Union{Missing,Continuous}}, - AbstractArray{<:Union{Missing,Count}}} - -# ----------------------------------------------------------- -# MeanAbsoluteError - -struct MeanAbsoluteError <: Aggregated end - -metadata_measure(MeanAbsoluteError; - instances = ["mae", "mav", "mean_absolute_error", - "mean_absolute_value"], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss), - -const MAE = MeanAbsoluteError -const MAV = MeanAbsoluteError -@create_aliases MeanAbsoluteError - -@create_docs(MeanAbsoluteError, -body= -""" -``\\text{mean absolute error} = n^{-1}∑ᵢ|yᵢ-ŷᵢ|`` or -``\\text{mean absolute error} = n^{-1}∑ᵢwᵢ|yᵢ-ŷᵢ|`` -""", -scitype=DOC_INFINITE) - -call(::MeanAbsoluteError, ŷ, y) = abs.(ŷ .- y) |> skipinvalid |> mean -call(::MeanAbsoluteError, ŷ, y, w) = abs.(ŷ .- y) .* w |> skipinvalid |> mean - -# ---------------------------------------------------------------- -# RootMeanSquaredError - -struct RootMeanSquaredError <: Aggregated end - -metadata_measure(RootMeanSquaredError; - instances = ["rms", "rmse", - "root_mean_squared_error"], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss, - aggregation = RootMeanSquare()) - -const RMS = RootMeanSquaredError -@create_aliases RootMeanSquaredError - -@create_docs(RootMeanSquaredError, -body= -""" -``\\text{root mean squared error} = \\sqrt{n^{-1}∑ᵢ|yᵢ-ŷᵢ|^2}`` or -``\\text{root mean squared error} = \\sqrt{\\frac{∑ᵢwᵢ|yᵢ-ŷᵢ|^2}{∑ᵢwᵢ}}`` -""", -scitype=DOC_INFINITE) - -call(::RootMeanSquaredError, ŷ, y) = (y .- ŷ).^2 |> skipinvalid |> mean |> sqrt -call(::RootMeanSquaredError, ŷ, y, w) = (y .- ŷ).^2 .* w |> skipinvalid |> mean |> sqrt - -# ------------------------------------------------------------------------- -# R-squared (coefficient of determination) - -struct RSquared <: Aggregated end - -metadata_measure(RSquared; - instances = ["rsq", "rsquared"], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :score, - supports_weights = false) - -const RSQ = RSquared -@create_aliases RSquared - -@create_docs(RSquared, -body= -""" -The R² (also known as R-squared or coefficient of determination) is suitable for -interpreting linear regression analysis (Chicco et al., [2021](https://doi.org/10.7717/peerj-cs.623)). - -Let ``\\overline{y}`` denote the mean of ``y``, then - -``\\text{R^2} = 1 - \\frac{∑ (\\hat{y} - y)^2}{∑ \\overline{y} - y)^2}.`` -""", -scitype=DOC_INFINITE) - -function call(::RSquared, ŷ, y) - num = (ŷ .- y).^2 |> skipinvalid |> sum - mean_y = mean(y) - denom = (mean_y .- y).^2 |> skipinvalid |> sum - return 1 - (num / denom) -end - -# ------------------------------------------------------------------- -# LP - -struct LPLoss{T<:Real} <: Unaggregated - p::T -end - -LPLoss(; p=2.0) = LPLoss(p) - -metadata_measure(LPLoss; - instances = ["l1", "l2"], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss) - -const l1 = LPLoss(1) -const l2 = LPLoss(2) - -@create_docs(LPLoss, -body= -""" -Constructor signature: `LPLoss(p=2)`. Reports -`|ŷ[i] - y[i]|^p` for every index `i`. -""", -scitype=DOC_INFINITE) - -single(m::LPLoss, ŷ, y) = abs(y - ŷ)^(m.p) - -# ---------------------------------------------------------------------------- -# RootMeanSquaredLogError - -struct RootMeanSquaredLogError <: Aggregated end - -metadata_measure(RootMeanSquaredLogError; - instances = ["rmsl", "rmsle", "root_mean_squared_log_error"], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss, - aggregation = RootMeanSquare()) - -const RMSL = RootMeanSquaredLogError -@create_aliases RootMeanSquaredLogError - -@create_docs(RootMeanSquaredLogError, -body= -""" -``\\text{root mean squared log error} = -\\sqrt{n^{-1}∑ᵢ\\log\\left({yᵢ \\over ŷᵢ}\\right)^2}`` -""", -footer="See also [`rmslp1`](@ref).", -scitype=DOC_INFINITE) - -call(::RootMeanSquaredLogError, ŷ, y) = - (log.(y) - log.(ŷ)).^2 |> skipinvalid |> mean |> sqrt -call(::RootMeanSquaredLogError, ŷ, y, w) = - (log.(y) - log.(ŷ)).^2 .* w |> skipinvalid |> mean |> sqrt - -# --------------------------------------------------------------------------- -# RootMeanSquaredLogProportionalError - -struct RootMeanSquaredLogProportionalError{T<:Real} <: Aggregated - offset::T -end - -RootMeanSquaredLogProportionalError(; offset=1.0) = - RootMeanSquaredLogProportionalError(offset) - -metadata_measure(RootMeanSquaredLogProportionalError; - instances = ["rmslp1", ], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss, - aggregation = RootMeanSquare()) - -const RMSLP = RootMeanSquaredLogProportionalError -@create_aliases RootMeanSquaredLogProportionalError - -@create_docs(RootMeanSquaredLogProportionalError, -body= -""" -Constructor signature: `RootMeanSquaredLogProportionalError(; offset = 1.0)`. - -``\\text{root mean squared log proportional error} = -\\sqrt{n^{-1}∑ᵢ\\log\\left({yᵢ + \\text{offset} \\over ŷᵢ + \\text{offset}}\\right)}`` -""", -footer="See also [`rmsl`](@ref). ", -scitype=DOC_INFINITE) - -call(m::RMSLP, ŷ, y) = - (log.(y .+ m.offset) - log.(ŷ .+ m.offset)).^2 |> - skipinvalid |> mean |> sqrt - -call(m::RMSLP, ŷ, y, w) = - (log.(y .+ m.offset) - log.(ŷ .+ m.offset)).^2 .* w |> - skipinvalid |> mean |> sqrt - -# -------------------------------------------------------------------------- -# RootMeanSquaredProportionalError - -struct RootMeanSquaredProportionalError{T<:Real} <: Aggregated - tol::T -end - -RootMeanSquaredProportionalError(; tol=eps()) = - RootMeanSquaredProportionalError(tol) - -metadata_measure(RootMeanSquaredProportionalError; - instances = ["rmsp", ], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss, - aggregation = RootMeanSquare()) - -const RMSP = RootMeanSquaredProportionalError -@create_aliases RMSP - -@create_docs(RootMeanSquaredProportionalError, -body= -""" -Constructor keyword arguments: `tol` (default = `eps()`). - -``\\text{root mean squared proportional error} = -\\sqrt{m^{-1}∑ᵢ \\left({yᵢ-ŷᵢ \\over yᵢ}\\right)^2}`` - -where the sum is over indices such that `abs(yᵢ) > tol` and `m` is the number -of such indices. - -""", scitype=DOC_INFINITE) - -function call( - m::RootMeanSquaredProportionalError, - ŷ, - y, - w=nothing, - ) - ret = 0 - count = 0 - @inbounds for i in eachindex(y) - (isinvalid(y[i]) || isinvalid(ŷ[i])) && continue - ayi = abs(y[i]) - if ayi > m.tol - dev = ((y[i] - ŷ[i]) / ayi)^2 - ret += dev - ret = _scale(ret, w, i) - count += 1 - end - end - return sqrt(ret / count) -end - -# ----------------------------------------------------------------------- -# MeanAbsoluteProportionalError - -struct MeanAbsoluteProportionalError{T} <: Aggregated - tol::T -end - -MeanAbsoluteProportionalError(; tol=eps()) = MeanAbsoluteProportionalError(tol) - -metadata_measure(MeanAbsoluteProportionalError; - instances = ["mape", ], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss) - -const MAPE = MeanAbsoluteProportionalError -@create_aliases MAPE - -@create_docs(MeanAbsoluteProportionalError, -body= -""" -Constructor key-word arguments: `tol` (default = `eps()`). - -``\\text{mean absolute proportional error} = m^{-1}∑ᵢ|{(yᵢ-ŷᵢ) \\over yᵢ}|`` - -where the sum is over indices such that `abs(yᵢ) > tol` and `m` is the number -of such indices. -""", scitype=DOC_INFINITE) - -function call( - m::MeanAbsoluteProportionalError, - ŷ, - y, - w=nothing, - ) - ret = 0 - count = 0 - @inbounds for i in eachindex(y) - (isinvalid(y[i]) || isinvalid(ŷ[i])) && continue - ayi = abs(y[i]) - if ayi > m.tol - #if y[i] != zero(eltype(y)) - dev = abs((y[i] - ŷ[i]) / ayi) - ret += dev - ret =_scale(ret, w, i) - count += 1 - end - end - return ret / count -end - -# ------------------------------------------------------------------------- -# LogCoshLoss - -struct LogCoshLoss <: Unaggregated end - -metadata_measure(LogCoshLoss; - instances = ["log_cosh", "log_cosh_loss"], - target_scitype = InfiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss) - -const LogCosh = LogCoshLoss -@create_aliases LogCoshLoss - -@create_docs(LogCoshLoss, - body="Reports ``\\log(\\cosh(ŷᵢ-yᵢ))`` for each index `i`. ", - scitype=DOC_INFINITE) - -_softplus(x::T) where T<:Real = x > zero(T) ? x + log1p(exp(-x)) : log1p(exp(x)) -_log_cosh(x::T) where T<:Real = x + _softplus(-2x) - log(convert(T, 2)) - -single(::LogCoshLoss, ŷ, y) = _log_cosh(ŷ - y) diff --git a/src/measures/doc_strings.jl b/src/measures/doc_strings.jl deleted file mode 100644 index 03ed76df..00000000 --- a/src/measures/doc_strings.jl +++ /dev/null @@ -1,12 +0,0 @@ -# the following creates doc-strings for the aliases (`instances`) of each measure: - -for m in measures() - name = m.name - for instance in m.instances - alias = Symbol(instance) - quote - @doc "An instance of type [`$($name)`](@ref). "* - "Query the [`$($name)`](@ref) doc-string for details. " $alias - end |> eval - end -end diff --git a/src/measures/finite.jl b/src/measures/finite.jl deleted file mode 100644 index 908525ab..00000000 --- a/src/measures/finite.jl +++ /dev/null @@ -1,1247 +0,0 @@ -const FiniteArrMissing{N} = Union{ - AbstractArray{<:Union{Missing,Multiclass{N}}}, - AbstractArray{<:Union{Missing,OrderedFactor{N}}}} - -# --------------------------------------------------- -# misclassification rate - -struct MisclassificationRate <: Aggregated end - -metadata_measure(MisclassificationRate; - instances = ["misclassification_rate", "mcr"], - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss) - -const MCR = MisclassificationRate -@create_aliases MCR - -@create_docs(MisclassificationRate, -body= -""" -A confusion matrix can also be passed as argument. -$INVARIANT_LABEL -""", -scitype=DOC_FINITE) - -# calling behaviour: -call(::MCR, ŷ, y) = (y .!= ŷ) |> Mean() -call(::MCR, ŷ, y, w) = (y .!= ŷ) .* w |> Mean() -(::MCR)(cm::ConfusionMatrixObject) = 1.0 - sum(diag(cm.mat)) / sum(cm.mat) - -# ------------------------------------------------------------- -# accuracy - -struct Accuracy <: Aggregated end - -metadata_measure(Accuracy; - instances = ["accuracy",], - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :score) - -@create_aliases Accuracy - -@create_docs(Accuracy, -body= -""" -Accuracy is proportion of correct predictions `ŷ[i]` that match the -ground truth `y[i]` observations. $INVARIANT_LABEL -""", -scitype=DOC_FINITE) - -# calling behaviour: -call(::Accuracy, args...) = 1.0 - call(misclassification_rate, args...) -(::Accuracy)(m::ConfusionMatrixObject) = sum(diag(m.mat)) / sum(m.mat) - -# ----------------------------------------------------------- -# balanced accuracy - -struct BalancedAccuracy <: Aggregated - adjusted::Bool -end -BalancedAccuracy(; adjusted=false) = BalancedAccuracy(adjusted) - -metadata_measure(BalancedAccuracy; - instances = ["balanced_accuracy", "bacc", "bac"], - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :score) - -const BACC = BalancedAccuracy -@create_aliases BACC - -@create_docs(BalancedAccuracy, -body= -""" -Balanced accuracy compensates standard [`Accuracy`](@ref) for class imbalance. -See [https://en.wikipedia.org/wiki/Precision_and_recall#Imbalanced_data](https://en.wikipedia.org/wiki/Precision_and_recall#Imbalanced_data). - -Setting `adjusted=true` rescales the score in the way prescribed in -[L. Mosley (2013): A balanced approach to the multi-class imbalance -problem. PhD thesis, Iowa State -University](https://lib.dr.iastate.edu/etd/13537/). In the binary -case, the adjusted balanced accuracy is also known as *Youden’s J -statistic*, or *informedness*. - -$INVARIANT_LABEL -""", -scitype=DOC_FINITE) - -function call(m::BACC, ŷm, ym, wm=nothing) - - ŷ, y, w = _skipinvalid(ŷm, ym, wm) - - if w === nothing - n_given_class = StatsBase.countmap(y) - freq(i) = @inbounds n_given_class[y[i]] - ŵ = 1 ./ freq.(eachindex(y)) - else # following sklearn, which is non-linear - ŵ = similar(w) - @inbounds for i in eachindex(w) - ŵ[i] = w[i] / sum(w .* (y .== y[i])) - end - end - s = sum(ŵ) - score = sum((ŷ .== y) .* ŵ) / sum(ŵ) - if m.adjusted - n_classes = length(levels(y)) - chance = 1 / n_classes - score -= chance - score /= 1 - chance - end - return score -end - -# --------------------------------------------------- -# kappa - -struct Kappa <: Aggregated end - -metadata_measure(Kappa; - instances = ["kappa"], - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :score, - supports_weights = false) - -@create_aliases Kappa - -@create_docs(Kappa, -body= -""" -A metric to measure agreement between predicted labels and the ground truth. -See [https://en.wikipedia.org/wiki/Cohen%27s_kappa](https://en.wikipedia.org/wiki/Cohen%27s_kappa) - -$INVARIANT_LABEL -""", -scitype=DOC_FINITE) - -# calling behaviour: -function (::Kappa)(cm::ConfusionMatrixObject{C}) where C - # relative observed agreement - same as accuracy - p₀ = sum(diag(cm.mat))/sum(cm.mat) - - # probability of agreement due to chance - for each class cᵢ, this - # would be: (#predicted=cᵢ)/(#instances) x (#observed=cᵢ)/(#instances) - rows_sum = sum!(similar(cm.mat, 1, C), cm.mat) # 1 x C matrix - cols_sum = sum!(similar(cm.mat, C, 1), cm.mat) # C X 1 matrix - pₑ = first(rows_sum*cols_sum)/sum(rows_sum)^2 - - # Kappa calculation - κ = (p₀ - pₑ)/(1 - pₑ) - - return κ -end - -call(k::Kappa, ŷ, y) = _confmat(ŷ, y, warn=false) |> k - - -# ================================================================== -## DETERMINISTIC BINARY PREDICTIONS - ORDER-INDEPENDENT - -# ------------------------------------------------------------------ -# Matthew's correlation - -struct MatthewsCorrelation <: Aggregated end - -metadata_measure(MatthewsCorrelation; - instances = ["matthews_correlation", "mcc"], - target_scitype = FiniteArrMissing{2}, - prediction_type = :deterministic, - orientation = :score, - supports_weights = false) -const MCC = MatthewsCorrelation -@create_aliases MCC - -@create_docs(MatthewsCorrelation, -body= -""" -[https://en.wikipedia.org/wiki/Matthews_correlation_coefficient](https://en.wikipedia.org/wiki/Matthews_correlation_coefficient) -$INVARIANT_LABEL -""", -scitype=DOC_FINITE_BINARY) - -# calling behaviour: -function (::MCC)(cm::ConfusionMatrixObject{C}) where C - # http://rk.kvl.dk/introduction/index.html - # NOTE: this is O(C^3), there may be a clever way to - # speed this up though in general this is only used for low C - num = 0 - @inbounds for k in 1:C, l in 1:C, m in 1:C - num += cm[k,k] * cm[l,m] - cm[k,l] * cm[m,k] - end - den1 = 0 - den2 = 0 - @inbounds for k in 1:C - a = sum(cm[k, :]) - b = sum(cm[setdiff(1:C, k), :]) - den1 += a * b - a = sum(cm[:, k]) - b = sum(cm[:, setdiff(1:C, k)]) - den2 += a * b - end - mcc = num / sqrt(float(den1) * float(den2)) - - isnan(mcc) && return 0 - return mcc -end - -call(m::MCC, ŷ, y) = _confmat(ŷ, y, warn=false) |> m - - -# ========================================================================== -# DETERMINISTIC BINARY PREDICTIONS - ORDER DEPENDENT - -const CM2 = ConfusionMatrixObject{2} - -# -------------------------------------------------------------------------- -# FScore - -struct FScore{T<:Real} <: Aggregated - β::T - rev::Union{Nothing,Bool} -end - -FScore(; β=1.0, rev=nothing) = FScore(β, rev) - -metadata_measure(FScore; - human_name = "F-Score", - instances = ["f1score",], - target_scitype = FiniteArrMissing{2}, - prediction_type = :deterministic, - orientation = :score, - supports_weights = false) - -@create_aliases FScore - -@create_docs(FScore, -body= -""" -This is the one-parameter generalization, ``F_β``, of the F-measure or -balanced F-score. - -[https://en.wikipedia.org/wiki/F1_score](https://en.wikipedia.org/wiki/F1_score) - -Constructor signature: `FScore(; β=1.0, rev=true)`. - -By default, the second element of `levels(y)` is designated as -`true`. To reverse roles, specify `rev=true`. -""", -scitype=DOC_ORDERED_FACTOR_BINARY, -footer="Constructor signature: `FScore(β=1.0, rev=false)`. ") - -# calling on conf matrix: -function (score::FScore)(m::CM2) - β = score.β - β2 = β^2 - tp = _tp(m) - fn = _fn(m) - fp = _fp(m) - return (1 + β2) * tp / ((1 + β2)*tp + β2*fn + fp) -end - -# calling on arrays: -call(m::FScore, ŷ, y) = _confmat(ŷ, y; rev=m.rev) |> m - -# ------------------------------------------------------------------------- -# TruePositive and its cousins - struct and metadata declerations - -const TRUE_POSITIVE_AND_COUSINS = - (:TruePositive, :TrueNegative, :FalsePositive, :FalseNegative, - :TruePositiveRate, :TrueNegativeRate, :FalsePositiveRate, - :FalseNegativeRate, :FalseDiscoveryRate, :Precision, - :NegativePredictiveValue) - -for M in TRUE_POSITIVE_AND_COUSINS - ex = quote - struct $M <: Aggregated rev::Union{Nothing,Bool} end - $M(; rev=nothing) = $M(rev) - end - eval(ex) -end - -metadata_measure.((FalsePositive, FalseNegative); - target_scitype = FiniteArrMissing{2}, - prediction_type = :deterministic, - orientation = :loss, - aggregation = Sum(), - supports_weights = false) - -metadata_measure.((FalsePositiveRate, FalseNegativeRate, FalseDiscoveryRate); - target_scitype = FiniteArrMissing{2}, - prediction_type = :deterministic, - orientation = :loss, - supports_weights = false) - -metadata_measure.((TruePositive, TrueNegative); - target_scitype = FiniteArrMissing{2}, - prediction_type = :deterministic, - orientation = :score, - aggregation = Sum(), - supports_weights = false) - -metadata_measure.((TruePositiveRate, TrueNegativeRate, Precision, - NegativePredictiveValue); - target_scitype = FiniteArrMissing{2}, - prediction_type = :deterministic, - orientation = :score, - supports_weights = false) - -# adjustments: -instances(::Type{<:TruePositive}) = ["true_positive", "truepositive"] -human_name(::Type{<:TruePositive}) = "number of true positives" - -instances(::Type{<:TrueNegative}) = ["true_negative", "truenegative"] -human_name(::Type{<:TrueNegative}) = "number of true negatives" - -instances(::Type{<:FalsePositive}) = ["false_positive", "falsepositive"] -human_name(::Type{<:FalsePositive}) = "number of false positives" - -instances(::Type{<:FalseNegative}) = ["false_negative", "falsenegative"] -human_name(::Type{<:FalseNegative}) = "number of false negatives" - -instances(::Type{<:TruePositiveRate}) = - ["true_positive_rate", "truepositive_rate", - "tpr", "sensitivity", "recall", "hit_rate"] -human_name(::Type{<:TruePositiveRate}) = - "true positive rate (a.k.a recall)" - -instances(::Type{<:TrueNegativeRate}) = - ["true_negative_rate", "truenegative_rate", "tnr", - "specificity", "selectivity"] - -instances(::Type{<:FalsePositiveRate}) = - ["false_positive_rate", "falsepositive_rate", - "fpr", "fallout"] - "." -instances(::Type{<:FalseNegativeRate}) = - ["false_negative_rate", "falsenegative_rate", "fnr", "miss_rate"] - "." -instances(::Type{<:FalseDiscoveryRate}) = - ["false_discovery_rate", "falsediscovery_rate", "fdr"] - -instances(::Type{<:NegativePredictiveValue}) = - ["negative_predictive_value", "negativepredictive_value", "npv"] - -instances(::Type{<:Precision}) = - ["positive_predictive_value", "ppv", "positivepredictive_value", "precision"] -human_name(::Type{<:Precision}) = - "precision (a.k.a. positive predictive value)" - - -# --------------------------------------------------------------------- -# TruePositive and its cousins - doc-string building and alias creation - -for M in TRUE_POSITIVE_AND_COUSINS - eval(quote - $M == Precision || @create_aliases $M # precision handled separately - - @create_docs($M, - body= - """ - Assigns `false` to first element of `levels(y)`. To reverse roles, - use `$(name($M))(rev=true)`. - """, - scitype=DOC_ORDERED_FACTOR_BINARY) - end) -end - -# type aliases: -const TNR = TrueNegativeRate -const Specificity = TrueNegativeRate -const TPR = TruePositiveRate -const Recall = TPR -const FPR = FalsePositiveRate -const FNR = FalseNegativeRate -const FDR = FalseDiscoveryRate -const NPV = NegativePredictiveValue -const PPV = Precision - -# special case of precision; cannot generate alias's automatically due -# to conflict with Base.precision: -const positive_predictive_value = Precision() -const ppv = Precision() -const positivepredictive_value = Precision() - -# ---------------------------------------------------------------------- -# TruePositive and its cousins - helper functions for confusion matrices - -_tp(m::CM2) = m[2,2] -_tn(m::CM2) = m[1,1] -_fp(m::CM2) = m[2,1] -_fn(m::CM2) = m[1,2] - -_tpr(m::CM2) = _tp(m) / (_tp(m) + _fn(m)) -_tnr(m::CM2) = _tn(m) / (_tn(m) + _fp(m)) -_fpr(m::CM2) = 1 - _tnr(m) -_fnr(m::CM2) = 1 - _tpr(m) - -_fdr(m::CM2) = _fp(m) / (_tp(m) + _fp(m)) -_npv(m::CM2) = _tn(m) / (_tn(m) + _fn(m)) - -# ---------------------------------------------------------------------- -# TruePositive and its cousins - calling behaviour - -# NOTE: here we assume the CM was constructed a priori with the -# proper ordering so the field `rev` in the measure is ignored - -# on confusion matrices: -(::TruePositive)(m::CM2) = _tp(m) -(::TrueNegative)(m::CM2) = _tn(m) -(::FalsePositive)(m::CM2) = _fp(m) -(::FalseNegative)(m::CM2) = _fn(m) -(::TPR)(m::CM2) = _tpr(m) -(::TNR)(m::CM2) = _tnr(m) -(::FPR)(m::CM2) = _fpr(m) -(::FNR)(m::CM2) = _fnr(m) -(::FDR)(m::CM2) = _fdr(m) -(::NPV)(m::CM2) = _npv(m) -(::Precision)(m::CM2) = 1.0 - _fdr(m) - -# on arrays (ŷ, y): -for M_ex in TRUE_POSITIVE_AND_COUSINS - @eval call(m::$M_ex, ŷ, y) = _confmat(ŷ, y; rev=m.rev) |> m -end - -# since Base.precision exists (as single argument function) we -# manually overload Base.precision: -Base.precision(m::CM2) = m |> Precision() -function Base.precision(ŷ, y) - _check(Precision(), ŷ, y) - call(Precision(), ŷ, y) -end - - -# ================================================================= -# MULTICLASS AND ORDER INDEPENDENT - -const CM = ConfusionMatrixObject{N} where N - -abstract type MulticlassAvg end -struct MacroAvg <: MulticlassAvg end -struct MicroAvg <: MulticlassAvg end -struct NoAvg <: MulticlassAvg end - -const macro_avg = MacroAvg() -const micro_avg = MicroAvg() -const no_avg = NoAvg() - -const DS_AVG_RET = "Options for `average` are: `no_avg`, `macro_avg` "* - "(default) and `micro_avg`. Options for `return_type`, "* - "applying in the `no_avg` case, are: `LittleDict` (default) or "* - "`Vector`. " - -const DS_RET = "Options for `return_type` are: "* - "`LittleDict`(default) or "* - "`Vector`. " - -const CLASS_W = "An optional `AbstractDict`, denoted `class_w` above, "* - "keyed on `levels(y)`, specifies class weights. It applies if "* - "`average=macro_avg` or `average=no_avg`." - -""" - MulticlassFScore(; β=1.0, average=macro_avg, return_type=LittleDict) - -One-parameter generalization, ``F_β``, of the F-measure or balanced F-score for -multiclass observations. - - MulticlassFScore()(ŷ, y) - MulticlassFScore()(ŷ, y, class_w) - -Evaluate the default score on multiclass observations, `ŷ`, given -ground truth values, `y`. $DS_AVG_RET $CLASS_W - -For more information, run `info(MulticlassFScore)`. - -""" -struct MulticlassFScore{T<:Real, - M<:MulticlassAvg, - U<:Union{Vector, LittleDict}} <:Aggregated - β::T - average::M - return_type::Type{U} -end - -MulticlassFScore(; β=1.0, average=macro_avg, return_type=LittleDict) = - MulticlassFScore(β, average, return_type) - -metadata_measure(MulticlassFScore; - instances = ["macro_f1score", "micro_f1score", - "multiclass_f1score"], - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :score, - supports_weights = false, - supports_class_weights = true) - -MLJModelInterface.docstring(::Type{<:MulticlassFScore}) = - "Multiclass F_β score; aliases: " * - "`macro_f1score=MulticlassFScore()`, "* - "`multiclass_f1score=MulticlassFScore()` " * - "`micro_f1score=MulticlassFScore(average=micro_avg)`." - -const micro_f1score = MulticlassFScore(average=micro_avg) -const macro_f1score = MulticlassFScore(average=macro_avg) -const multiclass_f1score = MulticlassFScore(average=macro_avg) - -for M in (:MulticlassTruePositive, :MulticlassTrueNegative, - :MulticlassFalsePositive, :MulticlassFalseNegative) - ex = quote - struct $M{U<:Union{Vector, LittleDict}} <: Aggregated - return_type::Type{U} - end -# $M(return_type::Type{U}) where {U} = $M(return_type) - $M(; return_type=LittleDict) = $M(return_type) - end - eval(ex) -end - -const _mtp_vec = MulticlassTruePositive(return_type=Vector) -const _mfn_vec = MulticlassFalseNegative(return_type=Vector) -const _mfp_vec = MulticlassFalsePositive(return_type=Vector) -const _mtn_vec = MulticlassTrueNegative(return_type=Vector) - -for M in (:MulticlassTruePositiveRate, :MulticlassTrueNegativeRate, - :MulticlassFalsePositiveRate, :MulticlassFalseNegativeRate, - :MulticlassFalseDiscoveryRate, :MulticlassPrecision, - :MulticlassNegativePredictiveValue) - ex = quote - struct $M{T<:MulticlassAvg, U<:Union{Vector, LittleDict}} <: Aggregated - average::T - return_type::Type{U} - end - $M(; average=macro_avg, return_type=LittleDict) = $M(average, return_type) - end - eval(ex) -end - -metadata_measure.((MulticlassFalsePositive, MulticlassFalseNegative); - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss, - aggregation = Sum(), - is_feature_dependent = false, - supports_weights = false, - supports_class_weights = false) - -metadata_measure.((MulticlassFalsePositiveRate, MulticlassFalseNegativeRate, - MulticlassFalseDiscoveryRate); - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :loss, - is_feature_dependent = false, - supports_weights = false, - supports_class_weights = true) - -metadata_measure.((MulticlassTruePositive, MulticlassTrueNegative); - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :score, - aggregation = Sum(), - is_feature_dependent = false, - supports_weights = false, - supports_class_weights = false) - -metadata_measure.((MulticlassTrueNegativeRate, MulticlassNegativePredictiveValue); - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :score, - is_feature_dependent = false, - supports_weights = false, - supports_class_weights = true) - -metadata_measure.((MulticlassTruePositiveRate, MulticlassPrecision); - target_scitype = FiniteArrMissing, - prediction_type = :deterministic, - orientation = :score, - is_feature_dependent = false, - supports_weights = false, - supports_class_weights = true) - -MMI.docstring(::Type{<:MulticlassTruePositive}) = - "Number of true positives; " * - "aliases: `multiclass_true_positive`, `multiclass_truepositive`." -instances(::Type{<:MulticlassTruePositive}) = - ["multiclass_true_positive", "multiclass_truepositive"] -MMI.docstring(::Type{<:MulticlassTrueNegative}) = - "Number of true negatives; " * - "aliases: `multiclass_true_negative`, `multiclass_truenegative`." -instances(::Type{<:MulticlassTrueNegative}) = - ["multiclass_true_negative", "multiclass_truenegative"] -MMI.docstring(::Type{<:MulticlassFalsePositive}) = - "Number of false positives; " * - "aliases: `multiclass_false_positive`, `multiclass_falsepositive`." -instances(::Type{<:MulticlassFalsePositive}) = - ["multiclass_false_positive", "multiclass_falsepositive"] -MMI.docstring(::Type{<:MulticlassFalseNegative}) = - "Number of false negatives; " * - "aliases: `multiclass_false_negative`, `multiclass_falsenegative`." -instances(::Type{<:MulticlassFalseNegative}) = - ["multiclass_false_negative", "multiclass_falsenegative"] - -MMI.docstring(::Type{<:MulticlassTruePositiveRate}) = - "multiclass true positive rate; aliases: " * - "`multiclass_true_positive_rate`, `multiclass_tpr`, " * - "`multiclass_sensitivity`, `multiclass_recall`, " * - "`multiclass_hit_rate`, `multiclass_truepositive_rate`, " -instances(::Type{<:MulticlassTruePositiveRate}) = - ["multiclass_true_positive_rate", "multiclass_tpr", - "multiclass_sensitivity", "multiclass_recall", - "multiclass_hit_rate", "multiclass_truepositive_rate"] -MMI.docstring(::Type{<:MulticlassTrueNegativeRate}) = - "multiclass true negative rate; aliases: " * - "`multiclass_true_negative_rate`, `multiclass_tnr` " * - " `multiclass_specificity`, `multiclass_selectivity`, " * - "`multiclass_truenegative_rate`." -instances(::Type{<:MulticlassTrueNegativeRate}) = - ["multiclass_true_negative_rate", "multiclass_tnr", - "multiclass_specificity", "multiclass_selectivity", - "multiclass_truenegative_rate"] -MMI.docstring(::Type{<:MulticlassFalsePositiveRate}) = - "multiclass false positive rate; aliases: " * - "`multiclass_false_positive_rate`, `multiclass_fpr` " * - "`multiclass_fallout`, `multiclass_falsepositive_rate`." -instances(::Type{<:MulticlassFalsePositiveRate}) = - ["multiclass_false_positive_rate", "multiclass_fpr", - "multiclass_fallout", "multiclass_falsepositive_rate"] -MMI.docstring(::Type{<:MulticlassFalseNegativeRate}) = - "multiclass false negative rate; aliases: " * - "`multiclass_false_negative_rate`, `multiclass_fnr`, " * - "`multiclass_miss_rate`, `multiclass_falsenegative_rate`." -instances(::Type{<:MulticlassFalseNegativeRate}) = - ["multiclass_false_negative_rate", "multiclass_fnr", - "multiclass_miss_rate", "multiclass_falsenegative_rate"] -MMI.docstring(::Type{<:MulticlassFalseDiscoveryRate}) = - "multiclass false discovery rate; "* - "aliases: `multiclass_false_discovery_rate`, " * - "`multiclass_falsediscovery_rate`, `multiclass_fdr`." -instances(::Type{<:MulticlassFalseDiscoveryRate}) = - ["multiclass_falsediscovery_rate", "multiclass_fdr", - "multiclass_false_discovery_rate"] -MMI.docstring(::Type{<:MulticlassNegativePredictiveValue}) = - "multiclass negative predictive value; aliases: " * - "`multiclass_negative_predictive_value`, " * - "`multiclass_negativepredictive_value`, `multiclass_npv`." -instances(::Type{<:MulticlassNegativePredictiveValue}) = - ["multiclass_negative_predictive_value", - "multiclass_negativepredictive_value", "multiclass_npv"] -MMI.docstring(::Type{<:MulticlassPrecision}) = - "multiclass positive predictive value (aka precision);"* - " aliases: `multiclass_positive_predictive_value`, `multiclass_ppv`, " * - "`multiclass_positivepredictive_value`, " * - "`multiclass_precision`." -instances(::Type{<:MulticlassPrecision}) = - ["multiclass_positive_predictive_value", "multiclass_ppv", - "multiclass_positivepredictive_value", "multiclass_precision"] - -const W_KEY_MISMATCH = "Encountered target with levels different from the " * - "keys of user-specified dictionary of class weights." -const W_PROMOTE_WARN = "Using macro averaging instead of micro averaging, as "* - "class weights specified. " - - -# ---------------------------------------------------- -# MulticlassTruePositive - -""" - MulticlassTruePositive(; return_type=LittleDict) - -$(docstring(MulticlassTruePositive())) - - MulticlassTruePositive()(ŷ, y) - -Number of true positives for multiclass observations `ŷ` and ground -truth `y`, using default return type. $DS_RET - -For more information, run `info(MulticlassTruePositive)`. - -""" -function MulticlassTruePositive end -const multiclass_true_positive = MulticlassTruePositive() -const multiclass_truepositive = MulticlassTruePositive() -const mtp = MulticlassTruePositive() - - -# ---------------------------------------------------- -# MulticlassTrueNegative - -""" - MulticlassTrueNegative(; return_type=LittleDict) - -$(docstring(MulticlassTrueNegative())) - - MulticlassTrueNegative()(ŷ, y) - -Number of true negatives for multiclass observations `ŷ` and ground truth -`y`, using default return type. $DS_RET - -For more information, run `info(MulticlassTrueNegative)`. - -""" -function MulticlassTrueNegative end -const multiclass_true_negative = MulticlassTrueNegative() -const multiclass_truenegative = MulticlassTrueNegative() -const mtn = MulticlassTrueNegative() - - -# ---------------------------------------------------- -# MulticlassFalsePositive - -""" - MulticlassFalsePositive(; return_type=LittleDict) - -$(docstring(MulticlassFalsePositive())) - - MulticlassFalsePositive()(ŷ, y) - -Number of false positives for multiclass observations `ŷ` and ground -truth `y`, using default return type. $DS_RET - -For more information, run `info(MulticlassFalsePositive)`. - -""" -function MulticlassPositive end -const multiclass_false_positive = MulticlassFalsePositive() -const multiclass_falsepositive = MulticlassFalsePositive() -const mfp = MulticlassFalsePositive() - - -# ---------------------------------------------------- -# MulticlassFalseNegative - -""" - MulticlassFalseNegative(; return_type=LittleDict) - -$(docstring(MulticlassFalseNegative())) - - MulticlassFalseNegative()(ŷ, y) - -Number of false negatives for multiclass observations `ŷ` and ground -truth `y`, using default return type. $DS_RET - -For more information, run `info(MulticlassFalseNegative)`. - -""" -function MulticlassNegative end -const multiclass_false_negative = MulticlassFalseNegative() -const multiclass_falsenegative = MulticlassFalseNegative() -const mfn = MulticlassFalseNegative() - - -# ---------------------------------------------------- -# MulticlassTruePositiveRate - -""" - MulticlassTruePositiveRate(; average=macro_avg, return_type=LittleDict) - -$(docstring(MulticlassTruePositiveRate())) - - MulticlassTruePositiveRate(ŷ, y) - MulticlassTruePositiveRate(ŷ, y, class_w) - -True positive rate (a.k.a. sensitivity, recall, hit rate) for -multiclass observations `ŷ` and ground truth `y`, using default -averaging and return type. $DS_AVG_RET $CLASS_W - -For more information, run `info(MulticlassTruePositiveRate)`. - -""" -function MulticlassTruePositiveRate end -const multiclass_true_positive_rate = MulticlassTruePositiveRate() -const multiclass_truepositive_rate = MulticlassTruePositiveRate() -const multiclass_tpr = MulticlassTruePositiveRate() -const multiclass_sensitivity = MulticlassTruePositiveRate() -const multiclass_hit_rate = MulticlassTruePositiveRate() -const MTPR = MulticlassTruePositiveRate -const multiclass_recall = MulticlassTruePositiveRate() -const MulticlassRecall = MulticlassTruePositiveRate - - -# ---------------------------------------------------- -# MulticlassTrueNegativeRate - -""" - MulticlassTrueNegativeRate(; average=macro_avg, return_type=LittleDict) - -$(docstring(MulticlassTrueNegativeRate())) - - MulticlassTrueNegativeRate()(ŷ, y) - MulticlassTrueNegativeRate()(ŷ, y, class_w) - -True negative rate for multiclass observations `ŷ` and ground truth -`y`, using default averaging and return type. $DS_AVG_RET $CLASS_W - -For more information, run `info(MulticlassTrueNegativeRate)`. - -""" -function MulticlassTrueNegativeRate end -const multiclass_true_negative_rate = MulticlassTrueNegativeRate() -const multiclass_truenegative_rate = MulticlassTrueNegativeRate() -const multiclass_tnr = MulticlassTrueNegativeRate() -const multiclass_specificity = MulticlassTrueNegativeRate() -const multiclass_selectivity = MulticlassTrueNegativeRate() -const MulticlassSpecificity = MulticlassTrueNegativeRate -const MTNR = MulticlassTrueNegativeRate - - -# ---------------------------------------------------- -# MulticlassFalsePositiveRate - -""" - MulticlassFalsePositiveRate(; average=macro_avg, return_type=LittleDict) - -$(docstring(MulticlassFalsePositiveRate())) - - MulticlassFalsePositiveRate()(ŷ, y) - MulticlassFalsePositiveRate()(ŷ, y, class_w) - -False positive rate for multiclass observations `ŷ` and ground truth -`y`, using default averaging and return type. $DS_AVG_RET $CLASS_W - -For more information, run `info(MulticlassFalsePositiveRate)`. - -""" -function MulticlassFalsePositiveRate end -const multiclass_false_positive_rate = MulticlassFalsePositiveRate() -const multiclass_falsepositive_rate = MulticlassFalsePositiveRate() -const multiclass_fpr = MulticlassFalsePositiveRate() -const MFPR = MulticlassFalsePositiveRate -const multiclass_fallout = MFPR() - - -# ---------------------------------------------------- -# MulticlassFalseNegativeRate - -""" - MulticlassFalseNegativeRate(; average=macro_avg, return_type=LittleDict) - -$(docstring(MulticlassFalseNegativeRate())) - - MulticlassFalseNegativeRate()(ŷ, y) - MulticlassFalseNegativeRate()(ŷ, y, class_w) - -False negative rate for multiclass observations `ŷ` and ground truth -`y`, using default averaging and return type. $DS_AVG_RET $CLASS_W - -For more information, run `info(MulticlassFalseNegativeRate)`. - -""" -function MulticlassFalseNegativeRate end -const multiclass_false_negative_rate = MulticlassFalseNegativeRate() -const multiclass_falsenegative_rate = MulticlassFalseNegativeRate() -const multiclass_fnr = MulticlassFalseNegativeRate() -const MFNR = MulticlassFalseNegativeRate -const multiclass_miss_rate = MFNR() - - -# ---------------------------------------------------- -# MulticlassFalseDiscoveryRate - -""" - MulticlassFalseDiscoveryRate(; average=macro_avg, return_type=LittleDict) - -$(docstring(MulticlassFalseDiscoveryRate())) - - MulticlassFalseDiscoveryRate()(ŷ, y) - MulticlassFalseDiscoveryRate()(ŷ, y, class_w) - -False discovery rate for multiclass observations `ŷ` and ground truth -`y`, using default averaging and return type. $DS_AVG_RET $CLASS_W - -For more information, run `info(MulticlassFalseDiscoveryRate)`. - -""" -function MulticlassFalseDiscoveryRate end -const multiclass_false_discovery_rate = MulticlassFalseDiscoveryRate() -const multiclass_falsediscovery_rate = MulticlassFalseDiscoveryRate() -const multiclass_fdr = MulticlassFalseDiscoveryRate() -const MFDR = MulticlassFalseDiscoveryRate - - -# ---------------------------------------------------- -# MulticlassPrecision - -""" - MulticlassPrecision(; average=macro_avg, return_type=LittleDict) - -$(docstring(MulticlassPrecision())) - - MulticlassPrecision()(ŷ, y) - MulticlassPrecision()(ŷ, y, class_w) - -Precision for multiclass observations `ŷ` and ground truth `y`, using -default averaging and return type. $DS_AVG_RET $CLASS_W - -For more information, run `info(MulticlassPrecision)`. - -""" -function MulticlassPrecision end -const multiclass_precision = MulticlassPrecision() -const multiclass_ppv = MulticlassPrecision() -const multiclass_positive_predictive_value = MulticlassPrecision() -const multiclass_positivepredictive_value = MulticlassPrecision() -const MPPV = MulticlassPrecision - - -# ---------------------------------------------------- -# MulticlassNegativePredictiveValue - -""" - MulticlassNegativePredictiveValue(; average=macro_avg, return_type=LittleDict) - -$(docstring(MulticlassNegativePredictiveValue())) - - MulticlassNegativePredictiveValue()(ŷ, y) - MulticlassNegativePredictiveValue()(ŷ, y, class_w) - -Negative predictive value for multiclass observations `ŷ` and ground truth -`y`, using default averaging and return type. $DS_AVG_RET $CLASS_W - -For more information, run `info(MulticlassNegativePredictiveValue)`. - -""" -function MulticlassNegativePredictiveValue end -const multiclass_npv = MulticlassNegativePredictiveValue() -const multiclass_negative_predictive_value = MulticlassNegativePredictiveValue() -const multiclass_negativepredictive_value = MulticlassNegativePredictiveValue() -const MNPV = MulticlassNegativePredictiveValue - - -# ----------------------------------------------------- -## INTERNAL FUNCTIONS ON MULTICLASS CONFUSION MATRIX - -_mtp(m::CM, return_type::Type{Vector}) = diag(m.mat) -_mtp(m::CM, return_type::Type{LittleDict}) = - LittleDict(m.labels, diag(m.mat)) - -_mfp(m::CM, return_type::Type{Vector}) = - (col_sum = vec(sum(m.mat, dims=2)); col_sum .-= diag(m.mat)) - -_mfp(m::CM, return_type::Type{LittleDict}) = - (col_sum = vec(sum(m.mat, dims=2)); col_sum .-= diag(m.mat); - LittleDict(m.labels, col_sum)) - -_mfn(m::CM, return_type::Type{Vector}) = - (row_sum = vec(collect(transpose(sum(m.mat, dims=1)))); - row_sum .-= diag(m.mat)) - -_mfn(m::CM, return_type::Type{LittleDict}) = - (row_sum = vec(collect(transpose(sum(m.mat, dims=1)))); - row_sum .-= diag(m.mat); LittleDict(m.labels, row_sum)) - -function _mtn(m::CM, return_type::Type{Vector}) - _sum = sum(m.mat, dims=2) - _sum .= sum(m.mat) .- (_sum .+= sum(m.mat, dims=1)'.- diag(m.mat)) - return vec(_sum) -end - -function _mtn(m::CM, return_type::Type{LittleDict}) - _sum = sum(m.mat, dims=2) - _sum .= sum(m.mat) .- (_sum .+= sum(m.mat, dims=1)'.- diag(m.mat)) - return LittleDict(m.labels, vec(_sum)) -end - -@inline _mean(x::Arr{<:Real}) = mean(skipnan(x)) # defined in src/data/data.jl - -@inline function _class_w(level_m::Arr{<:String}, - class_w::AbstractDict{<:Any, <:Real}) - class_w_labels = levels(keys(class_w)) - string.(class_w_labels) == level_m || throw(ArgumentError(W_KEY_MISMATCH)) - return [class_w[l] for l in class_w_labels] -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - average::NoAvg, return_type::Type{Vector}) - return vec(a ./ (a + b)) -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - average::NoAvg, return_type::Type{LittleDict}) - return LittleDict(m.labels, _mc_helper(m, a, b, average, Vector)) -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - average::MacroAvg, return_type) - return _mean(_mc_helper(m, a, b, no_avg, Vector)) -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - average::MicroAvg, return_type) - a_sum, b_sum = sum(a), sum(b) - return a_sum / (a_sum + b_sum) -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - class_w::AbstractDict{<:Any, <:Real}, - average::NoAvg, return_type::Type{Vector}) - level_w = _class_w(m.labels, class_w) - return _mc_helper(m, a, b, no_avg, return_type) .* level_w -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - class_w::AbstractDict{<:Any, <:Real}, - average::MacroAvg, return_type::Type{Vector}) - return _mean(_mc_helper(m, a, b, class_w, no_avg, return_type)) -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - class_w::AbstractDict{<:Any, <:Real}, - average::MicroAvg, return_type) - @warn W_PROMOTE_WARN - return _mc_helper(m, a, b, class_w, macro_avg, Vector) -end - -@inline function _mc_helper_b(m::CM, helper_name, - class_w::AbstractDict{<:Any, <:Real}, - average::NoAvg, return_type::Type{Vector}) - level_w = _class_w(m.labels, class_w) - return (1 .- helper_name(m, no_avg, return_type)) .* level_w -end - -@inline function _mc_helper_b(m::CM, helper_name, - class_w::AbstractDict{<:Any, <:Real}, - average::NoAvg, return_type::Type{LittleDict}) - level_w = _class_w(m.labels, class_w) - return LittleDict(m.labels, ((1 .- helper_name(m, no_avg, Vector)) .* level_w)) -end - -@inline function _mc_helper_b(m::CM, helper_name, - class_w::AbstractDict{<:Any, <:Real}, - average::MacroAvg, return_type) - return _mean(_mc_helper_b(m, helper_name, class_w, no_avg, Vector)) -end - -@inline function _mc_helper_b(m::CM, helper_name, - class_w::AbstractDict{<:Any, <:Real}, - average::MicroAvg, return_type) - @warn W_PROMOTE_WARN - return _mc_helper_b(m, helper_name, class_w, macro_avg, Vector) -end - -@inline function _mc_helper_b(m::CM, helper_name, average::NoAvg, - return_type::Type{LittleDict}) - return LittleDict(m.labels, 1.0 .- helper_name(m, average, Vector)) -end - -@inline function _mc_helper_b(m::CM, helper_name, average::NoAvg, - return_type::Type{Vector}) - return 1.0 .- helper_name(m, average, Vector) -end - -@inline function _mc_helper_b(m::CM, helper_name, average::MacroAvg, - return_type) - return 1.0 .- helper_name(m, average, Vector) -end - -@inline function _mc_helper_b(m::CM, helper_name, average::MicroAvg, - return_type) - return 1.0 .- helper_name(m, average, Vector) -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - class_w::AbstractDict{<:Any, <:Real}, - average::NoAvg, return_type::Type{LittleDict}) - level_w = _class_w(m.labels, class_w) - return LittleDict(m.labels, _mc_helper(m, a, b, class_w, no_avg, Vector)) -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - class_w::AbstractDict{<:Any, <:Real}, - average::MacroAvg, return_type::Type{U}) where U - return _mean(_mc_helper(m, a, b, class_w, no_avg, Vector)) -end - -@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real}, - class_w::AbstractDict{<:Any, <:Real}, - average::MicroAvg, return_type::Type{U}) where U - @warn W_PROMOTE_WARN - return _mc_helper(m, a, b, class_w, macro_avg, return_type) -end - -function _mtpr(m::CM, average::A, return_type::Type{U}) where {A, U} - mtp_val, mfn_val = _mtp_vec(m), _mfn_vec(m) - return _mc_helper(m, mtp_val, mfn_val, average, return_type) -end - -function _mtpr(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A, - return_type::Type{U}) where {A, U} - mtp_val, mfn_val = _mtp_vec(m), _mfn_vec(m) - return _mc_helper(m, mtp_val, mfn_val, class_w, average, return_type) -end - -function _mtnr(m::CM, average::A, return_type::Type{U}) where {A, U} - mtn_val, mfp_val = _mtn_vec(m), _mfp_vec(m) - return _mc_helper(m, mtn_val, mfp_val, average, return_type) -end - -function _mtnr(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A, - return_type::Type{U}) where {A, U} - mtn_val, mfp_val = _mtn_vec(m), _mfp_vec(m) - return _mc_helper(m, mtn_val, mfp_val, class_w, average, return_type) -end - -_mfpr(m::CM, average::A, return_type::Type{U}) where {A, U} = - _mc_helper_b(m, _mtnr, average, return_type) - -function _mfpr(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A, - return_type::Type{U}) where {A, U} - return _mc_helper_b(m, _mtnr, class_w, average, return_type) -end - -_mfnr(m::CM, average::A, return_type::Type{U}) where {A, U} = - _mc_helper_b(m, _mtpr, average, return_type) - -function _mfnr(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A, - return_type::Type{U}) where {A, U} - return _mc_helper_b(m, _mtpr, class_w, average, return_type) -end - -function _mfdr(m::CM, average::A, return_type::Type{U}) where {A, U} - mfp_val, mtp_val = _mfp_vec(m), _mtp_vec(m) - return _mc_helper(m, mfp_val, mtp_val, average, return_type) -end - -function _mfdr(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A, - return_type::Type{U}) where {A, U} - mfp_val, mtp_val = _mfp_vec(m), _mtp_vec(m) - return _mc_helper(m, mfp_val, mtp_val, class_w, average, return_type) -end - -function _mnpv(m::CM, average::A, return_type::Type{U}) where {A, U} - mtn_val, mfn_val = _mtn_vec(m), _mfn_vec(m) - return _mc_helper(m, mtn_val, mfn_val, average, return_type) -end - -function _mnpv(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A, - return_type::Type{U}) where {A, U} - mtn_val, mfn_val = _mtn_vec(m), _mfn_vec(m) - return _mc_helper(m, mtn_val, mfn_val, class_w, average, return_type) -end - -## CALLABLES ON MULTICLASS CONFUSION MATRIX - -(p::MulticlassTruePositive)(m::CM) = _mtp(m, p.return_type) -(n::MulticlassTrueNegative)(m::CM) = _mtn(m, n.return_type) -(p::MulticlassFalsePositive)(m::CM) = _mfp(m, p.return_type) -(n::MulticlassFalseNegative)(m::CM) = _mfn(m, n.return_type) - -(r::MTPR)(m::CM) = _mtpr(m, r.average, r.return_type) -(r::MTPR)(m::CM, w::AbstractDict{<:Any, <:Real}) = - _mtpr(m, w, r.average, r.return_type) - -(r::MTNR)(m::CM) = _mtnr(m, r.average, r.return_type) -(r::MTNR)(m::CM, w::AbstractDict{<:Any, <:Real}) = - _mtnr(m, w, r.average, r.return_type) - -(r::MFPR)(m::CM) = _mfpr(m, r.average, r.return_type) -(r::MFPR)(m::CM, w::AbstractDict{<:Any, <:Real}) = - _mfpr(m, w, r.average, r.return_type) - -(r::MFNR)(m::CM) = _mfnr(m, r.average, r.return_type) -(r::MFNR)(m::CM, w::AbstractDict{<:Any, <:Real}) = - _mfnr(m, w, r.average, r.return_type) - -(r::MFDR)(m::CM) = _mfdr(m, r.average, r.return_type) -(r::MFDR)(m::CM, w::AbstractDict{<:Any, <:Real}) = - _mfdr(m, w, r.average, r.return_type) - -(v::MNPV)(m::CM) = _mnpv(m, v.average, v.return_type) -(v::MNPV)(m::CM, w::AbstractDict{<:Any, <:Real}) = - _mnpv(m, w, v.average, v.return_type) - -(p::MulticlassPrecision)(m::CM) = - _mc_helper_b(m, _mfdr, p.average, p.return_type) -(p::MulticlassPrecision)(m::CM, class_w::AbstractDict{<:Any, <:Real}) = - _mc_helper_b(m, _mfdr, class_w, p.average, p.return_type) - -@inline function _fs_helper(m::CM, β::Real, mtp_val::Arr{<:Real}, mfp_val::Arr{<:Real}, mfn_val::Arr{<:Real}, - average::NoAvg, return_type::Type{LittleDict}) - β2 = β^2 - return LittleDict(m.labels, (1 + β2) * mtp_val ./ ((1 + β2) * mtp_val + β2 * mfn_val + mfp_val)) -end - -@inline function _fs_helper(m::CM, β::Real, mtp_val::Arr{<:Real}, mfp_val::Arr{<:Real}, mfn_val::Arr{<:Real}, - average::NoAvg, return_type::Type{Vector}) - β2 = β^2 - return (1 + β2) * mtp_val ./ ((1 + β2) * mtp_val + β2 * mfn_val + mfp_val) -end - -@inline function _fs_helper(m::CM, β::Real, mtp_val::Arr{<:Real}, mfp_val::Arr{<:Real}, mfn_val::Arr{<:Real}, - average::MacroAvg, return_type::Type{U}) where U - return _mean(_fs_helper(m, β, mtp_val, mfp_val, mfn_val, no_avg, Vector)) -end - -function (f::MulticlassFScore)(m::CM) - f.average == micro_avg && return MulticlassRecall(; average=micro_avg, return_type=f.return_type)(m) - mtp_val = _mtp(m, Vector) - mfp_val = _mfp(m, Vector) - mfn_val = _mfn(m, Vector) - return _fs_helper(m, f.β, mtp_val, mfp_val, mfn_val, f.average, f.return_type) -end - -@inline function _fs_helper(m::CM, w::AbstractDict{<:Any, <:Real}, β::Real, - average::NoAvg, return_type::Type{LittleDict}) - level_w = _class_w(m.labels, w) - return LittleDict(m.labels, - MulticlassFScore(β=β, - average=no_avg, - return_type=Vector)(m) .* level_w) -end - -@inline function _fs_helper(m::CM, w::AbstractDict{<:Any, <:Real}, β::Real, - average::NoAvg, return_type::Type{Vector}) - level_w = _class_w(m.labels, w) - return MulticlassFScore(β=β, - average=no_avg, - return_type=Vector)(m) .* level_w -end - -@inline function _fs_helper(m::CM, w::AbstractDict{<:Any, <:Real}, β::Real, - average::MacroAvg, return_type::Type{U}) where U - return _mean(_fs_helper(m, w, β, no_avg, Vector)) -end - -@inline function _fs_helper(m::CM, w::AbstractDict{<:Any, <:Real}, β::Real, - average::MicroAvg, return_type::Type{U}) where U - @warn W_PROMOTE_WARN - return _fs_helper(m, w, β, macro_avg, return_type) -end - -function (f::MulticlassFScore)(m::CM, class_w::AbstractDict{<:Any, <:Real}) - return _fs_helper(m, class_w, f.β, f.average, f.return_type) -end - -## Callables on arrays - -for M_ex in (:MulticlassTruePositive, :MulticlassTrueNegative, - :MulticlassFalsePositive, :MulticlassFalseNegative) - @eval call(m::$M_ex, ŷ, y) = m(_confmat(ŷ, y, warn=false)) -end - -for M_ex in (:MTPR, :MTNR, :MFPR, :MFNR, :MFDR, :MulticlassPrecision, :MNPV, - :MulticlassFScore) - @eval call(m::$M_ex, ŷ, y) = m(_confmat(ŷ, y, warn=false)) - @eval call(m::$M_ex, ŷ, y, class_w::AbstractDict{<:Any, <:Real}) = - m(_confmat(ŷ, y, warn=false), class_w) -end diff --git a/src/measures/loss_functions_interface.jl b/src/measures/loss_functions_interface.jl deleted file mode 100644 index 5d7d6125..00000000 --- a/src/measures/loss_functions_interface.jl +++ /dev/null @@ -1,208 +0,0 @@ -# implementation of MLJ measure interface for LossFunctions.jl - -function naked(T::Type) - without_module_name = split(string(T), '.') |> last - without_type_parameters = split(without_module_name, '{') |> first - return Symbol(without_type_parameters) -end - -const WITHOUT_PARAMETERS = - setdiff(LOSS_FUNCTIONS, WITH_PARAMETERS) - -## WRAPPER - -abstract type SupervisedLoss <: Unaggregated end - - -struct MarginLoss{L<:LossFunctions.MarginLoss} <: SupervisedLoss - loss::L -end - -struct DistanceLoss{L<:LossFunctions.DistanceLoss} <: SupervisedLoss - loss::L -end - -# INTERFACE FOR EXTRACTING PARAMETERS - -# LossFunctions.jl does not have a uniform interface for extacting -# parameters, and hence: - -_parameter(loss::LossFunctions.DWDMarginLoss) = loss.q -_parameter(loss::LossFunctions.SmoothedL1HingeLoss) = loss.gamma -_parameter(loss::LossFunctions.HuberLoss) = loss.d -_parameter(loss::LossFunctions.L1EpsilonInsLoss) = loss.ε -_parameter(loss::LossFunctions.L2EpsilonInsLoss) = loss.ε -_parameter(::LossFunctions.LPDistLoss{P}) where P = P -_parameter(::LossFunctions.L1DistLoss) = 1 -_parameter(::LossFunctions.L2DistLoss) = 2 -_parameter(loss::LossFunctions.QuantileLoss) = loss.τ - - -## CONSTRUCTORS AND CALLING BEHAVIOUR - -err_wrap(n) = ArgumentError("Bad @wrap syntax: $n. ") - -# We define amacro to wrap a concrete `LossFunctions.SupervisedLoss` -# type and define its constructor, and to define property access in -# case of parameters; the macro also defines calling behaviour: -macro wrap_loss(ex) - ex.head == :call || throw(err_wrap(1)) - Loss_ex = ex.args[1] - Loss_str = string(Loss_ex) - if Loss_ex in MARGIN_LOSSES - T = :MarginLoss - else - T = :DistanceLoss - end - - # bind name to wrapped version of LossFunctions loss: - program = quote - const $Loss_ex = $T{<:LossFunctions.$Loss_ex} - name(M::Type{<:$Loss_ex}) = $Loss_str - end - - # defined instances - alias = snakecase(string(Loss_ex)) - push!(program.args, quote - instances(::Type{<:$Loss_ex}) = [$alias, ] - end) - - # define kw constructor and expose any parameter as a property: - if length(ex.args) == 1 - push!(program.args, quote - $Loss_ex() = $T(LossFunctions.$Loss_ex()) - Base.propertynames(::$Loss_ex) = () - end) - elseif length(ex.args) > 1 - sub_ex = ex.args[2] - sub_ex.head == :parameters || throw(err_wrap(2)) - length(sub_ex.args) == 1 || throw(err_wrap("Only 1 kwarg supported")) - sub_ex.args[1].head == :kw || throw(err_wrap(3)) - var_ex = sub_ex.args[1].args[1] - var_str = string(var_ex) - val_ex = sub_ex.args[1].args[2] - push!(program.args, quote - $Loss_ex(; $var_ex=$val_ex) = - $T(LossFunctions.$Loss_ex($var_ex)) - $Loss_ex(p) = $Loss_ex($var_ex=p) - Base.propertynames(::$Loss_ex) = (Symbol($var_str), ) - function Base.getproperty(wrapper::$Loss_ex, name::Symbol) - if name === Symbol($var_str) - return _parameter(getfield(wrapper, :loss)) # see below - end - error("type $($Loss_ex) has no property $name") - end - end) - else - throw(err_wrap(4)) - end - - esc(program) -end - -for Loss in WITHOUT_PARAMETERS - eval(:(@wrap_loss $Loss())) -end - -@wrap_loss DWDMarginLoss(; q=1.0) -@wrap_loss SmoothedL1HingeLoss(; gamma=1.0) -@wrap_loss HuberLoss(; d=1.0) -@wrap_loss L1EpsilonInsLoss(; ε=1.0) -@wrap_loss L2EpsilonInsLoss(; ε=1.0) -@wrap_loss LPDistLoss(; P=2) -@wrap_loss QuantileLoss(; τ=0.7) - - -## GENERIC TRAITS - -const LossFunctions = LossFunctions -is_measure_type(::Type{<:SupervisedLoss}) = true -orientation(::Type{<:SupervisedLoss}) = :loss -reports_each_observation(::Type{<:SupervisedLoss}) = true -is_feature_dependent(::Type{<:SupervisedLoss}) = false -supports_weights(::Type{<:SupervisedLoss}) = true -docstring(M::Type{<:SupervisedLoss}) = name(M) - - -## CALLING - DISTANCE BASED LOSS FUNCTIONS - -MMI.prediction_type(::Type{<:DistanceLoss}) = :deterministic -MMI.target_scitype(::Type{<:DistanceLoss}) = Union{Vec{Continuous},Vec{Count}} - -call(measure::DistanceLoss, yhat, y) = - (getfield(measure, :loss)).(yhat, y) - -function call(measure::DistanceLoss, yhat, y, w::AbstractArray) - return w .* call(measure, yhat, y) -end - - -## CALLING - MARGIN BASED LOSS FUNCTIONS - -MMI.prediction_type(::Type{<:MarginLoss}) = :probabilistic -MMI.target_scitype(::Type{<:MarginLoss}) = AbstractArray{<:Finite{2}} - -# rescale [0, 1] -> [-1, 1]: -_scale(p) = 2p - 1 - -function call(measure::MarginLoss, yhat, y) - probs_of_observed = broadcast(pdf, yhat, y) - loss = getfield(measure, :loss) - return loss.(_scale.(probs_of_observed), 1) -end - -call(measure::MarginLoss, yhat, y, w::AbstractArray) = - w .* call(measure, yhat, y) - - -## ADJUSTMENTS - -human_name(::Type{<:L1EpsilonInsLoss}) = "l1 ϵ-insensitive loss" -human_name(::Type{<:L2EpsilonInsLoss}) = "l2 ϵ-insensitive loss" -human_name(::Type{<:DWDMarginLoss}) = "distance weighted discrimination loss" - -_signature(::Any) = "" -_signature(::Type{<:HuberLoss}) = "`HuberLoss(; d=1.0)`" -_signature(::Type{<:DWDMarginLoss}) = "`DWDMarginLoss(; q=1.0)`" -_signature(::Type{<:SmoothedL1HingeLoss}) = "`SmoothedL1HingeLoss(; gamma=1.0)`" -_signature(::Type{<:L1EpsilonInsLoss}) = "`L1EpsilonInsLoss(; ε=1.0)`" -_signature(::Type{<:L2EpsilonInsLoss}) = "`L2EpsilonInsLoss(; ε=1.0)`" -_signature(::Type{<:LPDistLoss}) = "`LPDistLoss(; P=2)`" -_signature(::Type{<:QuantileLoss}) = "`QuantileLoss(; τ=0.7)`" - - -## ALIASES AND DOCSTRINGS - -const DOC_LOSS_FUNCTIONS = -""" -For more detail, see the original LossFunctions.jl documentation *but -note differences in the signature.* - -Losses from LossFunctions.jl do not support `missing` values. To use -with `missing` values, replace `(ŷ, y)` with `skipinvalid(ŷ, y))`. -""" - -for Loss_ex in DISTANCE_LOSSES - eval(quote - sig = _signature($Loss_ex) - isempty(sig) || (sig = "Constructor signature: "*sig) - @create_aliases $Loss_ex - @create_docs($Loss_ex, - typename = name($Loss_ex), - body=DOC_LOSS_FUNCTIONS, - footer=sig) - end) -end - -for Loss_ex in MARGIN_LOSSES - eval(quote - sig = _signature($Loss_ex) - isempty(sig) || (sig = "Constructor signature: "*sig) - @create_aliases $Loss_ex - @create_docs($Loss_ex, - typename = name($Loss_ex), - body=DOC_LOSS_FUNCTIONS, - scitype=DOC_FINITE_BINARY, - footer= sig) - end) -end diff --git a/src/measures/measure_search.jl b/src/measures/measure_search.jl deleted file mode 100644 index bd813009..00000000 --- a/src/measures/measure_search.jl +++ /dev/null @@ -1,65 +0,0 @@ -const LOCAL_MEASURE_TYPES = filter(x->x != SupervisedLoss, - vcat(subtypes(MLJBase.Unaggregated), - subtypes(MLJBase.Aggregated))) - -const LOSS_FUNCTIONS_MEASURE_TYPES = - [eval(:($Loss)) for Loss in LOSS_FUNCTIONS] - -const MEASURE_TYPES = vcat(LOCAL_MEASURE_TYPES, LOSS_FUNCTIONS_MEASURE_TYPES) - -const MeasureProxy = NamedTuple{Tuple(MEASURE_TRAITS)} - -function Base.show(stream::IO, p::MeasureProxy) - instances = "["*join(p.instances, ", ")*"]" - print(stream, "(name = $(p.name), instances = $instances, ...)") -end - -function Base.show(stream::IO, ::MIME"text/plain", p::MeasureProxy) - printstyled(IOContext(stream, :color=> MLJBase.SHOW_COLOR[]), - p.docstring, bold=false, color=:magenta) - println(stream) - MLJBase.fancy_nt(stream, p) -end - -""" - measures() - -List all measures as named-tuples keyed on measure traits. - - measures(filters...) - -List all measures compatible with the target `y`. - - measures(needle::Union{AbstractString,Regex} - -List all measures with `needle` in a measure's `name`, `instances`, or -`docstring` - - -### Example - -Find all classification measures supporting sample weights: - - measures(m -> m.target_scitype <: AbstractVector{<:Finite} && - m.supports_weights) - -Find all measures in the "rms" family: - - measures("rms") - -""" -function measures(conditions...) - all_measures = map(info, MEASURE_TYPES) - return filter(all_measures) do measure - all(c(measure) for c in conditions) - end -end - -function measures(needle::Union{AbstractString,Regex}) - f = m -> occursin(needle, m.name) || - occursin(needle, m.docstring) || - occursin(needle, join(m.instances, " ")) - return MLJBase.measures(f) -end - -measures() = measures(x->true) diff --git a/src/measures/measures.jl b/src/measures/measures.jl deleted file mode 100644 index 3c23a4f9..00000000 --- a/src/measures/measures.jl +++ /dev/null @@ -1,302 +0,0 @@ -const PROPER_SCORING_RULES = "[Gneiting and Raftery (2007), \"Strictly"* - "Proper Scoring Rules, Prediction, and Estimation\""* - "](https://doi.org/10.1198/016214506000001437)" -const DOC_FINITE = - "`AbstractArray{<:Union{Finite,Missing}` (multiclass classification)" -const DOC_FINITE_BINARY = - "`AbstractArray{<:Union{Finite{2},Missing}}` (binary classification)" -const DOC_ORDERED_FACTOR = - "`AbstractArray{<:Union{OrderedFactor,Missing}}` (classification of ordered target)" -const DOC_ORDERED_FACTOR_BINARY = - "`AbstractArray{<:Union{OrderedFactor{2},Missing}}` "* - "(binary classification where choice of \"true\" effects the measure)" -const DOC_CONTINUOUS = "`AbstractArray{<:Union{Continuous,Missing}}` (regression)" -const DOC_COUNT = "`AbstractArray{<:Union{Count,Missing}}`" -const DOC_MULTI = "`AbtractArray{<:Union{Missing,T}` where `T` is `Continuous` "* - "or `Count` (for respectively continuous or discrete Distribution.jl objects in "* - "`ŷ`) or `OrderedFactor` or `Multiclass` "* - "(for `UnivariateFinite` distributions in `ŷ`)" - -const DOC_INFINITE = "`AbstractArray{<:Union{Infinite,Missing}}`" -const INVARIANT_LABEL = - "This metric is invariant to class reordering." -const VARIANT_LABEL = - "This metric is *not* invariant to class re-ordering" - -is_measure_type(::Any) = false - -# Each of the following traits, with fallbacks defined in -# StatisticalTraits.jl, make sense for some or all measures: - -const MEASURE_TRAITS = [ - :name, - :instances, - :human_name, - :target_scitype, - :supports_weights, - :supports_class_weights, - :prediction_type, - :orientation, - :reports_each_observation, - :aggregation, - :is_feature_dependent, - :docstring, - :distribution_type -] - -# # FOR BUILT-IN MEASURES (subtyping Measure) - -abstract type Measure <: MLJType end -abstract type Aggregated <: Measure end -abstract type Unaggregated <: Measure end - -StatisticalTraits.reports_each_observation(::Type{<:Aggregated}) = false -StatisticalTraits.reports_each_observation(::Type{<:Unaggregated}) = true - - -# # FALLBACK CHECKS -extra_check(::Measure, args...) = nothing -function _check(measure::Measure, yhat, y) - check_dimensions(yhat, y) - extra_check(measure, yhat, y) -end -function _check(measure::Measure, yhat, y, w) - check_dimensions(yhat, y) - extra_check(measure, yhat, y, w) -end -function _check(measure::Measure, yhat, y, w::Arr) - check_dimensions(yhat, y) - check_dimensions(y, w) - extra_check(measure, yhat, y, w) -end -function _check(measure::Measure, yhat::Arr{<:UnivariateFinite}) - check_dimensions(yhat, y) - check_pools(yhat, y) - extra_check(measure, yhat, y) -end - -function _check( - measure::Measure, - yhat::Arr{<:UnivariateFinite}, - y, - w::Arr -) - check_dimensions(yhat, y) - check_pools(yhat, y) - extra_check(measure, yhat, y, w) -end - -function _check( - measure::Measure, - yhat::Arr{<:UnivariateFinite}, - y, - w::AbstractDict -) - check_dimensions(yhat, y) - check_pools(yhat, y) - check_pools(yhat, w) - extra_check(measure, yhat, y, w) -end - -# # METHODS TO EVALUATE MEASURES - -# See measures/README.md for details - -# `robust_single` can accept `missing` observations/predictions but is never overloaded; -# `single` is overloaded but does not need to handle missings. This factoring allows us -# to avoid method ambiguities which are cumbersome to avoid with only one function. - -robust_single(args...) = single(args...) -robust_single(m, ::Missing, ::Missing) = missing -robust_single(m, ::Missing, η) = missing -robust_single(m, η̂, ::Missing) = missing - -const Label = Union{CategoricalValue, Number, AbstractString, Symbol, AbstractChar} - -# closure for broadcasting: -robust_single(measure::Measure) = (ηhat, η) -> robust_single(measure, ηhat, η) - -call(measure::Unaggregated, yhat, y) = broadcast(robust_single(measure), yhat, y) -function call(measure::Unaggregated, yhat, y, w::AbstractArray) - unweighted = broadcast(robust_single(measure), yhat, y) - return w .* unweighted -end -function call(measure::Unaggregated, yhat, y, weight_given_class::AbstractDict) - unweighted = broadcast(robust_single(measure), yhat, y) - w = @inbounds broadcast(η -> weight_given_class[η], y) - return w .* unweighted -end - -# ## Top level -function (measure::Measure)(args...) - _check(measure, args...) - call(measure, args...) -end - -# # TRAITS - -# user-bespoke measures will subtype `Measure` directly and the -# following will therefore not apply: -StatisticalTraits.supports_weights(::Type{<:Union{Aggregated, Unaggregated}}) = true - -is_measure_type(::Type{<:Measure}) = true -is_measure(m) = is_measure_type(typeof(m)) - -# docstring fall-back: -_decorate(s::AbstractString) = "`$s`" -_decorate(v::Vector{<:AbstractString}) = join(_decorate.(v), ", ") -function MMI.docstring(M::Type{<:Measure}) - list = _decorate(instances(M)) - ret = "`$(name(M))` - $(human_name(M)) type" - isempty(list) || (ret *= " with instances $list") - ret *= ". " - return ret -end - -# display: -show_as_constructed(::Type{<:Measure}) = true - -# info -function StatisticalTraits.info(M::Type{<:Measure}) - values = Tuple(@eval($trait($M)) for trait in MEASURE_TRAITS) - return NamedTuple{Tuple(MEASURE_TRAITS)}(values) -end - -StatisticalTraits.info(m::Measure) = StatisticalTraits.info(typeof(m)) - - -# # AGGREGATION - -(::Sum)(v) = sum(skipinvalid(v)) -(::Sum)(v::LittleDict) = sum(values(v)) - -(::Mean)(v) = mean(skipinvalid(v)) -(::Mean)(v::LittleDict) = mean(values(v)) - -(::RootMeanSquare)(v) = sqrt(mean(skipinvalid(v).^2)) - -aggregate(v, measure) = aggregation(measure)(v) - -# aggregation is no-op on scalars: -const MeasureValue = Union{Real,Tuple{<:Real,<:Real}} # number or interval -aggregate(x::MeasureValue, measure) = x - - -# # UNIVERSAL CALLING SYNTAX - -# yhat - predictions (point or probabilisitic) -# X - features -# y - target observations -# w - per-observation weights - -function value(measure, yhat, X, y, w) - vfdep = Val(is_feature_dependent(measure)) - vsweights = Val(supports_weights(measure) || - supports_class_weights(measure)) - return value(measure, yhat, X, y, w, vfdep, vsweights) -end - -# # UNIVERSAL CALLING INTERFACE - -# is feature independent, weights not supported: -value(m, yhat, X, y, w, ::Val{false}, ::Val{false}) = m(yhat, y) - -# is feature dependent:, weights not supported: -value(m, yhat, X, y, w, ::Val{true}, ::Val{false}) = m(yhat, X, y) - -# is feature independent, weights supported: -value(m, yhat, X, y, w, ::Val{false}, ::Val{true}) = m(yhat, y, w) -value(m, yhat, X, y, ::Nothing, ::Val{false}, ::Val{true}) = m(yhat, y) - -# is feature dependent, weights supported: -value(m, yhat, X, y, w, ::Val{true}, ::Val{true}) = m(yhat, X, y, w) -value(m, yhat, X, y, ::Nothing, ::Val{true}, ::Val{true}) = m(yhat, X, y) - -# # helpers - -_scale(x, w::Arr, i) = x*w[i] -_scale(x, ::Nothing, i::Any) = x - -function check_pools(ŷ, y) - levels(y) == levels(ŷ[1]) || - error("Conflicting categorical pools found "* - "in observations and predictions. ") - return nothing -end - -function check_pools(ŷ, w::AbstractDict) - Set(levels(ŷ[1])) == Set(keys(w)) || - error("Conflicting categorical pools found "* - "in class weights and predictions. ") - return nothing -end - -# # INCLUDE SPECIFIC MEASURES AND TOOLS - -include("meta_utilities.jl") -include("roc.jl") -include("confusion_matrix.jl") -include("continuous.jl") -include("finite.jl") -include("probabilistic.jl") -include("loss_functions_interface.jl") - - -# # DEFAULT MEASURES - -default_measure(T, S) = _default_measure(T, nonmissingtype(S)) - -_default_measure(T, S) = nothing - -# Deterministic + Continuous / Count ==> RMS -function _default_measure( - ::Type{<:Deterministic}, - ::Type{<:Union{Vec{<:Continuous}, Vec{<:Count}}}, -) - return rms -end - -# Deterministic + Finite ==> Misclassification rate -function _default_measure( - ::Type{<:Deterministic}, - ::Type{<:Vec{<:Finite}}, -) - return misclassification_rate -end - -# Probabilistic + Finite / Count ==> log loss -function _default_measure( - ::Type{<:Probabilistic}, - ::Type{<:Union{Vec{<:Finite},Vec{<:Count}}}, -) - return log_loss -end - -# Probabilistic + Continuous ==> Log loss -function _default_measure( - ::Type{<:Probabilistic}, - ::Type{<:Vec{<:Continuous}}, -) - return log_loss -end - -function _default_measure( - ::Type{<:MMI.ProbabilisticDetector}, - ::Type{<:Vec{<:OrderedFactor{2}}}, -) - return area_under_curve -end - -function _default_measure( - ::Type{<:MMI.DeterministicDetector}, - ::Type{<:Vec{<:OrderedFactor{2}}}, -) - return balanced_accuracy -end - -# Fallbacks -default_measure(M::Type{<:Supervised}) = default_measure(M, target_scitype(M)) -default_measure(::M) where M <: Supervised = default_measure(M) - -default_measure(M::Type{<:Annotator}) = _default_measure(M, target_scitype(M)) -default_measure(::M) where M <: Annotator = default_measure(M) diff --git a/src/measures/meta_utilities.jl b/src/measures/meta_utilities.jl deleted file mode 100644 index 3b0de197..00000000 --- a/src/measures/meta_utilities.jl +++ /dev/null @@ -1,233 +0,0 @@ -const DOC_OBSERVATIONS = - "on predictions `ŷ`, "* - "given ground truth observations `y`. " -const DOC_WEIGHTS = - "Optionally specify per-sample weights, `w`. " -const DOC_CLASS_WEIGHTS = - "An optional `AbstractDict`, denoted `class_w` above, "* - "keyed on `levels(y)`, specifies class weights. " - -macro create_aliases(M_ex) - esc(quote - M = $M_ex - for alias in Symbol.(instances(M)) - # isdefined(parentmodule(M), alias) || eval(:(const $alias = $M())) - eval(:(const $alias = $M())) - end - end) -end - -function detailed_doc_string(M; typename="", body="", footer="", scitype="") - - _instances = _decorate(instances(M)) - human_name = MLJBase.human_name(M) - if isempty(scitype) - scitype = "`$(target_scitype(M))`" - end - - if isempty(typename) - ret = " $M\n\n" - else - ret = " MLJBase.$typename\n\n" - end - - ret *= "A measure type for $(human_name)" - isempty(_instances) || - (ret *= ", which includes the instance(s): "* - "$_instances") - ret *= ".\n\n" - ret *= " $(name(M))()(ŷ, y)\n" - supports_weights(M) && - (ret *= " $(name(M))()(ŷ, y, w)\n") - supports_class_weights(M) && - (ret *= " $(name(M))()(ŷ, y, class_w)\n") - ret *= "\n" - if isempty(fieldnames(M)) - ret *= "Evaluate the $(human_name) " - else - ret *= "Evaluate the default instance of $(name(M)) " - end - ret *= "$DOC_OBSERVATIONS" - supports_weights(M) && - (ret *= DOC_WEIGHTS) - supports_class_weights(M) && - (ret *= DOC_CLASS_WEIGHTS) - ret *= "\n\n" - isempty(body) || (ret *= "$body\n\n") - ret *= "Requires `scitype(y)` to be a subtype of $scitype; " - ret *= "`ŷ` must be an array of `$(prediction_type(M))` predictions. " - isempty(footer) ||(ret *= "\n\n$footer") - ret *= "\n\n" - ret *= "For more information, run `info($(name(M)))`. " - return ret -end - - -_err_create_docs() = error( - "@create_docs syntax error. Usage: \n"* - "@create_docs(MeasureType, typename=..., body=..., scitype=..., footer=...") -macro create_docs(M_ex, exs...) - M_ex isa Symbol || _err_create_docs() - t = "" - b = "" - s = "" - f = "" - for ex in exs - ex.head == :(=) || _err_create_docs() - ex.args[1] == :typename && (t = ex.args[2]) - ex.args[1] == :body && (b = ex.args[2]) - ex.args[1] == :scitype && (s = ex.args[2]) - ex.args[1] == :footer && (f = ex.args[2]) - end - esc(quote - "$(detailed_doc_string($M_ex, typename=$t, body=$b, scitype=$s, footer=$f))" - function $M_ex end - end) -end - -# TODO: I wonder why this is not a macro? - -""" - metadata_measure(T; kw...) - -Helper function to write the metadata (trait definitions) for a single -measure. - -### Compulsory keyword arguments - -- `target_scitype`: The allowed scientific type of `y` in `measure(ŷ, - y, ...)`. This is typically some abstract array. E.g, in single - target variable regression this is typically - `AbstractArray{<:Union{Missing,Continuous}}`. For a binary - classification metric insensitive to class order, this would - typically be `Union{AbstractArray{<:Union{Missing,Multiclass{2}}}, - AbstractArray{<:Union{Missing,OrderedFactor{2}}}}`, which has the - alias `FiniteArrMissing`. - -- `orientation`: Orientation of the measure. Use `:loss` when lower is - better and `:score` when higher is better. For example, set - `:loss` for root mean square and `:score` for area under the ROC - curve. - -- `prediction_type`: Refers to `ŷ` in `measure(ŷ, y, ...)` and should - be one of: `:deterministic` (`ŷ` has same type as `y`), - `:probabilistic` or `:interval`. - - -#### Optional keyword arguments - -The following have meaningful defaults but may still require -overloading: - -- `instances`: A vector of strings naming the built-in instances of - the measurement type provided by the implementation, which are - usually just common aliases for the default instance. E.g., for - `RSquared` has the `instances = ["rsq", "rsquared"]` which are both - defined as `RSquared()` in the implementation. `MulticlassFScore` - has the `instances = ["macro_f1score", "micro_f1score", - "multiclass_f1score"]`, where `micro_f1score = - MulticlassFScore(average=micro_avg)`, etc. Default is `String[]`. - -- `aggregation`: Aggregation method for measurements, typically - `Mean()` (for, e.g., mean absolute error) or `Sum()` (for number - of true positives). Default is `Mean()`. Must subtype - `StatisticalTraits.AggregationMode`. It is used to: - - - aggregate measurements in resampling (e.g., cross-validation) - - - aggregating per-observation measurements returned by `single` in - the fallback definition of `call` for `Unaggregated` measures - (such as area under the ROC curve). - -- `supports_weights`: Whether the measure can be called with - per-observation weights `w`, as in `l2(ŷ, y, w)`. Default is `true`. - -- `supports_class_weights`: Whether the measure can be called with a - class weight dictionary `w`, as in `micro_f1score(ŷ, y, w)`. Default - is `true`. Default is `false`. - -- `human_name`: Ordinary name of measure. Used in the full - auto-generated docstring, which begins "A measure type for - \$human_name ...". Eg, the `human_name` for `TruePositive` is `number - of true positives. Default is snake-case version of type name, with - underscores replaced by spaces; so `MeanAbsoluteError` becomes "mean - absolute error". - -- `docstring`: An abbreviated docstring, displayed by - `info(measure)`. Fallback uses `human_name` and lists the - `instances`. - -""" -function metadata_measure(T; name::String="", - human_name="", - instances::Vector{String}=String[], - target_scitype=Unknown, - prediction_type::Symbol=:unknown, - orientation::Symbol=:unknown, - aggregation=Mean(), - is_feature_dependent::Bool=false, - supports_weights::Bool=true, - supports_class_weights::Bool=false, - docstring::String="", - distribution_type=Unknown) - pred_str = "$prediction_type" - orientation_str = "$orientation" -# dist = ifelse(ismissing(distribution_type), missing, "$distribution_type") - ex = quote - - # traits common with models: - if !isempty($name) - StatisticalTraits.name(::Type{<:$T}) = $name - end - if !isempty($docstring) - StatisticalTraits.docstring(::Type{<:$T}) = $docstring - end - StatisticalTraits.target_scitype(::Type{<:$T}) = $target_scitype - StatisticalTraits.prediction_type(::Type{<:$T}) = Symbol($pred_str) - StatisticalTraits.supports_weights(::Type{<:$T}) = $supports_weights - - # traits specific to measures: - if !isempty($instances) - StatisticalTraits.instances(::Type{<:$T}) = $instances - end - if !isempty($human_name) - StatisticalTraits.human_name(::Type{<:$T}) = $human_name - end - StatisticalTraits.orientation(::Type{<:$T}) = Symbol($orientation_str) - StatisticalTraits.aggregation(::Type{<:$T}) = $aggregation - StatisticalTraits.is_feature_dependent(::Type{<:$T}) = - $is_feature_dependent - StatisticalTraits.supports_class_weights(::Type{<:$T}) = - $supports_class_weights - StatisticalTraits.distribution_type(::Type{<:$T}) = $distribution_type - - end - parentmodule(T).eval(ex) -end - -""" - - measures_for_export() - -Return a list of the symbolic representation of all: - -- measure types (subtypes of `Aggregated` and `Unaggregated`) measure - -- type aliases (as defined by the constant - `MLJBase.MEASURE_TYPE_ALIASES`) - -- all built-in measure instances (as declared by `instances` trait) - -""" -function measures_for_export() - ret = MLJBase.MEASURE_TYPE_ALIASES - for m in measures() - name = m.name |> Symbol - push!(ret, name) - for instance in m.instances - alias = Symbol(instance) - push!(ret, alias) - end - end - return ret -end diff --git a/src/measures/probabilistic.jl b/src/measures/probabilistic.jl deleted file mode 100644 index 11c3bcdf..00000000 --- a/src/measures/probabilistic.jl +++ /dev/null @@ -1,423 +0,0 @@ -const DOC_DISTRIBUTIONS = -""" -In the case the predictions `ŷ` are continuous probability -distributions, such as `Distributions.Normal`, replace the above sum -with an integral, and interpret `p` as the probablity density -function. In case of discrete distributions over the integers, such as -`Distributions.Poisson`, sum over all integers instead of `C`. -""" -const WITH_L2NORM_CONTINUOUS = - [@eval(Distributions.$d) for d in [ - :Chisq, - :Gamma, - :Beta, - :Chi, - :Cauchy, - :Normal, - :Uniform, - :Logistic, - :Exponential]] - -const WITH_L2NORM_COUNT = - [@eval(Distributions.$d) for d in [ - :Poisson, - :DiscreteUniform, - :DiscreteNonParametric]] - -const WITH_L2NORM = vcat([UnivariateFinite, ], - WITH_L2NORM_CONTINUOUS, - WITH_L2NORM_COUNT) - -const UD = Distributions.UnivariateDistribution - -# ======================================================== -# AGGREGATED MEASURES - -# --------------------------------------------------------- -# AreaUnderCurve - -# Implementation based on the Mann-Whitney U statistic. -# see https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve -# and https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test#Area_under_curve_(AUC)_statistic_for_ROC_curves - - -struct AreaUnderCurve <: Aggregated end - -metadata_measure(AreaUnderCurve; - human_name = "area under the ROC", - instances = ["area_under_curve", "auc"], - target_scitype = FiniteArrMissing{2}, - prediction_type = :probabilistic, - orientation = :score, - supports_weights = false, - distribution_type = UnivariateFinite) - -const AUC = AreaUnderCurve -@create_aliases AreaUnderCurve - -@create_docs(AreaUnderCurve, -body= -""" -Returns the area under the ROC ([receiver operator -characteristic](https://en.wikipedia.org/wiki/Receiver_operating_characteristic)) - -If `missing` or `NaN` values are present, use `auc(skipinvalid(yhat, y)...)`. - -$INVARIANT_LABEL -""", -scitpye = DOC_FINITE_BINARY) - -# core algorithm: -function _auc(ŷ, y) - lab_pos = classes(ŷ)[2] # 'positive' label - scores = pdf.(ŷ, lab_pos) # associated scores - ranks = StatsBase.tiedrank(scores) - n = length(y) - n_neg = 0 # to keep of the number of negative preds - T = eltype(ranks) - R_pos = zero(T) # sum of positive ranks - @inbounds for (i,j) in zip(eachindex(y), eachindex(ranks)) - if y[i] == lab_pos - R_pos += ranks[j] - else - n_neg += 1 - end - end - n_pos = n - n_neg # number of positive predictions - U = R_pos - T(0.5)*n_pos*(n_pos + 1) # Mann-Whitney U statistic - return U / (n_neg * n_pos) -end - -# Missing values not supported, but allow `Missing` in eltype, because -# `skipinvalid(yhat, y)` does not tighten the type. See doc string above. - -call(::AUC, ŷ, y) = _auc(ŷ, y) - -# ======================================================== -# UNAGGREGATED MEASURES - -# --------------------------------------------------------------------- -# LogScore - -struct LogScore{R <: Real} <: Unaggregated - tol::R -end -LogScore(;eps=eps(), tol=eps) = LogScore(tol) - -metadata_measure(LogScore; - instances = ["log_score", ], - target_scitype = Union{ - Arr{<:Union{Missing,Multiclass}}, - Arr{<:Union{Missing,OrderedFactor}}, - Arr{<:Union{Missing,Continuous}}, - Arr{<:Union{Missing,Count}}}, - prediction_type = :probabilistic, - orientation = :score, - distribution_type = Union{WITH_L2NORM...}) - -@create_aliases LogScore - -@create_docs(LogScore, -body= -""" -Since the score is undefined in the case that the true observation is -predicted to occur with probability zero, probablities are clamped -between `tol` and `1-tol`, where `tol` is a constructor key-word -argument. - -If `p` is the predicted probability mass or density function -corresponding to a *single* ground truth observation `η`, then the -score for that example is - - log(clamp(p(η), tol), 1 - tol) - -For example, for a binary target with "yes"/"no" labels, and -predicted probability of "yes" equal to 0.8, an observation of "no" -scores `log(0.2)`. - -The predictions `ŷ` should be an array of `UnivariateFinite` -distributions in the case of `Finite` target `y`, and otherwise a -supported `Distributions.UnivariateDistribution` such as `Normal` or -`Poisson`. - -See also [`LogLoss`](@ref), which differs only in sign. -""", -scitype=DOC_MULTI) - -# for single finite observation: -single(c::LogScore, d::UnivariateFinite, η) = - log(clamp(pdf(d, η), c.tol, 1 - c.tol)) - -# for a single infinite observation: -single(c::LogScore, d::Distributions.UnivariateDistribution, η) = - log(clamp(pdf(d, η), c.tol, 1 - c.tol)) - -# to resolve method ambiguities: -single(::LogScore, ::UnivariateFinite, ::Missing) = missing -single(::LogScore, ::Distributions.UnivariateDistribution, ::Missing) = missing -single(::LogScore, ::Missing, ::Missing) = missing - -# performant broadasting in case of UnivariateFiniteArray: -call(c::LogScore, ŷ::UnivariateFiniteArray, y) = - log.(clamp.(broadcast(pdf, ŷ, y), c.tol, 1 - c.tol)) -call(c::LogScore, ŷ::UnivariateFiniteArray, y, w::AbstractArray) = call(c, ŷ, y) .* w - -# --------------------------------------------------------------------- -# LogLoss - -struct LogLoss{R <: Real} <: Unaggregated - tol::R -end -LogLoss(;eps=eps(), tol=eps) = LogLoss(tol) - -metadata_measure(LogLoss; - instances = ["log_loss", "cross_entropy"], - target_scitype = Union{ - Arr{<:Union{Missing,Multiclass}}, - Arr{<:Union{Missing,OrderedFactor}}, - Arr{<:Union{Missing,Continuous}}, - Arr{<:Union{Missing,Count}}}, - prediction_type = :probabilistic, - orientation = :loss, - distribution_type = Union{WITH_L2NORM...}) - -const CrossEntropy = LogLoss -@create_aliases LogLoss - -@create_docs(LogLoss, -body= -""" -For details, see [`LogScore`](@ref), which differs only by a sign. -""", -scitype=DOC_MULTI) - -# for single observation: -single(c::LogLoss, d, η) = -single(LogScore(tol=c.tol), d, η) - -# to get performant broadasting in case of UnivariateFiniteArray: -call(c::LogLoss, ŷ::UnivariateFiniteArray, y) = - -call(LogScore(tol=c.tol), ŷ, y) -call(c::LogLoss, ŷ::UnivariateFiniteArray, y, w::AbstractArray) = - -call(LogScore(tol=c.tol), ŷ, y, w) - - -# ----------------------------------------------------- -# BrierScore - -struct BrierScore <: Unaggregated end - -metadata_measure(BrierScore; - human_name = "Brier score (a.k.a. quadratic score)", - instances = ["brier_score",], - target_scitype = Union{ - Arr{<:Union{Missing,Multiclass}}, - Arr{<:Union{Missing,OrderedFactor}}, - Arr{<:Union{Missing,Continuous}}, - Arr{<:Union{Missing,Count}}}, - prediction_type = :probabilistic, - orientation = :score, - distribution_type = Union{WITH_L2NORM...}) - -@create_aliases BrierScore - -@create_docs(BrierScore, -body= -""" -Convention as in $PROPER_SCORING_RULES - -*Finite case.* If `p` is the predicted probability mass function for a -*single* observation `η`, and `C` all possible classes, then the -corresponding score for that observation is given by - -``2p(η) - \\left(\\sum_{c ∈ C} p(c)^2\\right) - 1`` - -*Warning.* `BrierScore()` is a "score" in the sense that bigger is -better (with `0` optimal, and all other values negative). In Brier's -original 1950 paper, and many other places, it has the opposite sign, -despite the name. Moreover, the present implementation does not treat -the binary case as special, so that the score may differ in the binary -case by a factor of two from usage elsewhere. - -*Infinite case.* Replacing the sum above with an integral does *not* -lead to the formula adopted here in the case of `Continuous` or -`Count` target `y`. Rather the convention in the paper cited above is -adopted, which means returning a score of - -``2p(η) - ∫ p(t)^2 dt`` - -in the `Continuous` case (`p` the probablity density function) or - -``2p(η) - ∑_t p(t)^2`` - -in the `Count` cae (`p` the probablity mass function). -""", -scitype=DOC_MULTI) - -# calling on single finite observation: -function single(::BrierScore, - d::UnivariateFinite, - η) - levels = classes(d) - pvec = broadcast(pdf, d, levels) - offset = 1 + sum(pvec.^2) - return 2 * pdf(d, η) - offset -end - -# calling on a single infinite observation: -single(::BrierScore, d::Distributions.UnivariateDistribution, η) = - 2*pdf(d, η) - Distributions.pdfsquaredL2norm(d) - -# To get performant broadcasted version in case of UnivariateFiniteArray: -function call( - ::BrierScore, - ŷ::UnivariateFiniteArray, - y - ) - - probs = pdf(ŷ, classes(first(ŷ))) - offset = 1 .+ vec(sum(probs.^2, dims=2)) - - 2 .* broadcast(pdf, ŷ, y) .- offset -end -call(m::BrierScore, ŷ::UnivariateFiniteArray, y, w::AbstractArray) = call(m, ŷ, y) .* w - - -# ----------------------------------------------------- -# BrierLoss - -struct BrierLoss <: Unaggregated end - -metadata_measure(BrierLoss; - human_name = "Brier loss (a.k.a. quadratic loss)", - instances = ["brier_loss",], - target_scitype = Union{ - Arr{<:Union{Missing,Multiclass}}, - Arr{<:Union{Missing,OrderedFactor}}, - Arr{<:Union{Missing,Continuous}}, - Arr{<:Union{Missing,Count}}}, - prediction_type = :probabilistic, - orientation = :loss, - distribution_type = Union{WITH_L2NORM...}) - -@create_aliases BrierLoss - -@create_docs(BrierLoss, -body= -""" -For details, see [`BrierScore`](@ref), which differs only by a sign. -""", -scitype=DOC_MULTI) - -# calling on single observation: -single(::BrierLoss, d, η) = - single(BrierScore(), d, η) - -# to get performant broadcasting in case of UnivariateFiniteArray: -call(m::BrierLoss, ŷ::UnivariateFiniteArray, y) = - -call(BrierScore(), ŷ, y) -call(m::BrierLoss, ŷ::UnivariateFiniteArray, y, w::AbstractArray) = - -call(BrierScore(), ŷ, y, w) - - -# ----------------------------------------------------- -# SphericalScore - -struct SphericalScore{T<:Real} <: Unaggregated - alpha::T -end -SphericalScore(; alpha=2) = SphericalScore(alpha) - -metadata_measure(SphericalScore; - human_name = "Spherical score", - instances = ["spherical_score",], - target_scitype = Union{ - Arr{<:Union{Missing,Multiclass}}, - Arr{<:Union{Missing,OrderedFactor}}, - Arr{<:Union{Missing,Continuous}}, - Arr{<:Union{Missing,Count}}}, - prediction_type = :probabilistic, - orientation = :score, - distribution_type = Union{WITH_L2NORM...}) - -@create_aliases SphericalScore - -@create_docs(SphericalScore, -body= -""" -Convention as in $PROPER_SCORING_RULES: If `η` takes on a finite -number of classes `C` and ``p(η)` is the predicted probability for a -*single* observation `η`, then the corresponding score for that -observation is given by - -``p(y)^α / \\left(\\sum_{η ∈ C} p(η)^α\\right)^{1-α} - 1`` - -where `α` is the measure parameter `alpha`. - -$DOC_DISTRIBUTIONS - -""", -scitype=DOC_MULTI) - -# calling on single observations: -function single(s::SphericalScore, d::UnivariateFinite, η) - α = s.alpha - levels = classes(d) - pvec = broadcast(pdf, d, levels) - return (pdf(d, η)/norm(pvec, α))^(α - 1) -end - -single(s::SphericalScore, d::Distributions.UnivariateDistribution, η) = - pdf(d, η)/sqrt(Distributions.pdfsquaredL2norm(d)) - -# to compute the α-norm along last dimension: -_norm(A::AbstractArray{<:Any,N}, α) where N = - sum(x -> x^α, A, dims=N).^(1/α) - -# To get performant version in case of UnivariateFiniteArray: -function call( - s::SphericalScore, - ŷ::UnivariateFiniteArray, - y - ) - α = s.alpha - alphanorm(A) = _norm(A, α) - - predicted_probs = pdf(ŷ, classes(first(ŷ))) - - (broadcast(pdf, ŷ, y) ./ alphanorm(predicted_probs)).^(α - 1) -end -call(s::SphericalScore, ŷ::UnivariateFiniteArray, y, w::AbstractArray) = - call(s, ŷ, y) .* w - - -# --------------------------------------------------------------------------- -# Extra check for L2 norm based proper scoring rules - -err_l2_norm(m) = ArgumentError( - "Distribution not supported by $m. "* - "Supported distributions are "* - join(string.(map(s->"`$s`", WITH_L2NORM)), ", ", ", and ")) - -const ERR_UNSUPPORTED_ALPHA = ArgumentError( - "Only `alpha = 2` is supported, unless scoring a `Finite` target. ") - -# not for export: -const L2ProperScoringRules = Union{LogScore, - LogLoss, - BrierScore, - BrierLoss, - SphericalScore} - -function extra_check(measure::L2ProperScoringRules, yhat, args...) - - D = nonmissing(eltype(yhat)) - D <: Distributions.Distribution || D <: UnivariateFinite || - (D = typeof(findfirst(x->!isinvalid(x), yhat))) - D <: Union{Nothing, WITH_L2NORM...} || - throw(err_l2_norm(measure)) - - if measure isa SphericalScore - measure.alpha == 2 || throw(ERR_UNSUPPORTED_ALPHA) - end - - return nothing -end diff --git a/src/measures/roc.jl b/src/measures/roc.jl deleted file mode 100644 index 8614b00e..00000000 --- a/src/measures/roc.jl +++ /dev/null @@ -1,91 +0,0 @@ -## ROC COMPUTATION - -""" - _idx_unique_sorted(v) - -Internal function to return the index of unique elements in `v` under the -assumption that the vector `v` is sorted in decreasing order. -""" -function _idx_unique_sorted(v::Vec{<:Real}) - n = length(v) - idx = ones(Int, n) - p, h = 1, 1 - cur = v[1] - @inbounds while h < n - h += 1 # head position - cand = v[h] # candidate value - cand < cur || continue # is it new? otherwise skip - p += 1 # if new store it - idx[p] = h - cur = cand # and update the last seen value - end - p < n && deleteat!(idx, p+1:n) - return idx -end - -""" - fprs, tprs, ts = roc_curve(ŷ, y) = roc(ŷ, y) - -Return the ROC curve for a two-class probabilistic prediction `ŷ` given the -ground truth `y`. The true positive rates, false positive rates over a range -of thresholds `ts` are returned. Note that if there are `k` unique scores, -there are correspondingly `k` thresholds and `k+1` "bins" over which the FPR -and TPR are constant: - -* `[0.0 - thresh[1]]` -* `[thresh[1] - thresh[2]]` -* ... -* `[thresh[k] - 1]` - -consequently, `tprs` and `fprs` are of length `k+1` if `ts` is of length `k`. - -To draw the curve using your favorite plotting backend, do `plot(fprs, tprs)`. -""" -function roc_curve(ŷm, ym) - ŷ, y = skipinvalid(ŷm, ym) - length(classes(ŷ)) == 2 || throw( - ArgumentError("`ŷ` must be a two-class probabilistic prediction") - ) - length(levels(y)) == 2 || throw( - ArgumentError("`y` must be a categorical vector with two-levels.") - ) - n = length(y) - lab_pos = levels(y)[2] - scores = pdf.(ŷ, lab_pos) - ranking = sortperm(scores, rev=true) - - scores_sort = scores[ranking] - y_sort_bin = (y[ranking] .== lab_pos) - - idx_unique = _idx_unique_sorted(scores_sort) - thresholds = scores_sort[idx_unique] - - # detailed computations with example: - # y = [ 1 0 0 1 0 0 1] - # s = [0.5 0.5 0.2 0.2 0.1 0.1 0.1] thresh are 0.5 0.2, 0.1 // idx [1, 3, 5] - # ŷ = [ 0 0 0 0 0 0 0] (0.5 - 1.0] # no pos pred - # ŷ = [ 1 1 0 0 0 0 0] (0.2 - 0.5] # 2 pos pred - # ŷ = [ 1 1 1 1 0 0 0] (0.1 - 0.2] # 4 pos pred - # ŷ = [ 1 1 1 1 1 1 1] [0.0 - 0.1] # all pos pre - - idx_unique_2 = idx_unique[2:end] # [3, 5] - n_ŷ_pos = idx_unique_2 .- 1 # [2, 4] implicit [0, 2, 4, 7] - - cs = cumsum(y_sort_bin) # [1, 1, 1, 2, 2, 2, 3] - n_tp = cs[n_ŷ_pos] # [1, 2] implicit [0, 1, 2, 3] - n_fp = n_ŷ_pos .- n_tp # [1, 2] implicit [0, 1, 2, 4] - - # add end points - P = sum(y_sort_bin) # total number of true positives - N = n - P # total number of true negatives - - n_tp = [0, n_tp..., P] # [0, 1, 2, 3] - n_fp = [0, n_fp..., N] # [0, 1, 2, 4] - - tprs = n_tp ./ P # [0/3, 1/3, 2/3, 1] - fprs = n_fp ./ N # [0/4, 1/4, 2/4, 1] - - return fprs, tprs, thresholds -end - -const roc = roc_curve diff --git a/src/operations.jl b/src/operations.jl index efa275ac..9fab3999 100644 --- a/src/operations.jl +++ b/src/operations.jl @@ -168,43 +168,7 @@ const err_unsupported_operation(operation) = ErrorException( "network machine that does not support it. " ) -## SURROGATE AND COMPOSITE MODELS - - -for operation in [:predict, - :predict_joint, - :transform, - :inverse_transform] - ex = quote - function $operation(model::Union{Composite,Surrogate}, fitresult,X) - if hasproperty(fitresult, $(QuoteNode(operation))) - return fitresult.$operation(X) - else - throw(err_unsupported_operation($operation)) - end - end - end - eval(ex) -end - -for (operation, fallback) in [(:predict_mode, :mode), - (:predict_mean, :mean), - (:predict_median, :median)] - ex = quote - function $(operation)(m::Union{ProbabilisticComposite,ProbabilisticSurrogate}, - fitresult, - Xnew) - if hasproperty(fitresult, $(QuoteNode(operation))) - return fitresult.$(operation)(Xnew) - end - return $(fallback).(predict(m, fitresult, Xnew)) - end - end - eval(ex) -end - - -## NETWORKCOMPOSITE MODELS +## NETWORK COMPOSITE MODELS # In the case of `NetworkComposite` models, the `fitresult` is a learning network # signature. If we call a node in the signature (eg, do `fitresult.predict()`) then we may @@ -222,9 +186,9 @@ for operation in [:predict, :transform, :inverse_transform] quote - function $operation(model::NetworkComposite, fitresult, Xnew) + function $operation(model::NetworkComposite, fitresult, Xnew...) if $(QuoteNode(operation)) in MLJBase.operations(fitresult) - return output_and_report(fitresult, $(QuoteNode(operation)), Xnew) + return output_and_report(fitresult, $(QuoteNode(operation)), Xnew...) end throw(err_unsupported_operation($operation)) end @@ -242,7 +206,7 @@ for (operation, fallback) in [(:predict_mode, :mode), return output_and_report(fitresult, $(QuoteNode(operation)), Xnew) end # The following line retuns a `Tuple` since `m` is a `NetworkComposite` - predictions, report = predict(m, fitresult, Xnew) + predictions, report = predict(m, fitresult, Xnew) return $(fallback).(predictions), report end end |> eval diff --git a/src/resampling.jl b/src/resampling.jl index 43483cc3..6b055951 100644 --- a/src/resampling.jl +++ b/src/resampling.jl @@ -14,8 +14,6 @@ const PREDICT_OPERATIONS_STRING = begin join(strings, ", ", ", or ") end const PROG_METER_DT = 0.1 -const ERR_WEIGHTS_REAL = - ArgumentError("`weights` must be a `Real` vector. ") const ERR_WEIGHTS_LENGTH = DimensionMismatch("`weights` and target "* "have different lengths. ") @@ -32,19 +30,41 @@ const ERR_INVALID_OPERATION = ArgumentError( "Invalid `operation` or `operations`. "* "An operation must be one of these: $PREDICT_OPERATIONS_STRING. ") _ambiguous_operation(model, measure) = - "`prediction_type($measure) == $(prediction_type(measure))` but "* - "`prediction_type($model) == $(prediction_type(model))`." + "`$measure` does not support a `model` with "* + "`prediction_type(model) == :$(prediction_type(model))`. " err_ambiguous_operation(model, measure) = ArgumentError( _ambiguous_operation(model, measure)* - "\nUnable to deduce an appropriate operation for $measure. "* + "\nUnable to infer an appropriate operation for `$measure`. "* "Explicitly specify `operation=...` or `operations=...`. ") err_incompatible_prediction_types(model, measure) = ArgumentError( _ambiguous_operation(model, measure)* - "If your model really is making probabilistic predictions, try explicitly "* + "If your model is truly making probabilistic predictions, try explicitly "* "specifiying operations. For example, for "* "`measures = [area_under_curve, accuracy]`, try "* "`operations=[predict, predict_mode]`. ") - +const LOG_AVOID = "\nTo override measure checks, set check_measure=false. " +const LOG_SUGGESTION1 = + "\nPerhaps you want to set `operation="* + "predict_mode` or need to "* + "specify multiple operations, "* + "one for each measure. " +const LOG_SUGGESTION2 = + "\nPerhaps you want to set `operation="* + "predict_mean` or `operation=predict_median`, or "* + "specify multiple operations, "* + "one for each measure. " +ERR_MEASURES_OBSERVATION_SCITYPE(measure, T_measure, T) = ArgumentError( + "\nobservation scitype of target = `$T` but ($measure) only supports "* + "`$T_measure`."*LOG_AVOID +) +ERR_MEASURES_PROBABILISTIC(measure, suggestion) = ArgumentError( + "The model subtypes `Probabilistic`, and so is not supported by "* + "`$measure`. $suggestion"*LOG_AVOID +) +ERR_MEASURES_DETERMINISTIC(measure) = ArgumentError( + "The model subtypes `Deterministic`, "* + "and so is not supported by `$measure`. "*LOG_AVOID +) # ================================================================== ## MODEL TYPES THAT CAN BE EVALUATED @@ -345,7 +365,7 @@ For example, if you run `replace!(y, 'a' => 'b', 'b' => 'a')` and then re-run `train_test_pairs`, the returned `(train, test)` pairs will be the same. Pre-shuffling of `rows` is controlled by `rng` and `shuffle`. If `rng` -is an integer, then the `StratifedCV` keyword constructor resets it to +is an integer, then the `StratifedCV` keywod constructor resets it to `MersenneTwister(rng)`. Otherwise some `AbstractRNG` object is expected. @@ -448,72 +468,68 @@ end """ PerformanceEvaluation -Type of object returned by [`evaluate`](@ref) (for models plus data) -or [`evaluate!`](@ref) (for machines). Such objects encode estimates -of the performance (generalization error) of a supervised model or -outlier detection model. - -When `evaluate`/`evaluate!` is called, a number of train/test pairs -("folds") of row indices are generated, according to the options -provided, which are discussed in the [`evaluate!`](@ref) -doc-string. Rows correspond to observations. The generated train/test -pairs are recorded in the `train_test_rows` field of the -`PerformanceEvaluation` struct, and the corresponding estimates, -aggregated over all train/test pairs, are recorded in `measurement`, a -vector with one entry for each measure (metric) recorded in `measure`. - -When displayed, a `PerformanceEvalution` object includes a value under -the heading `1.96*SE`, derived from the standard error of the `per_fold` -entries. This value is suitable for constructing a formal 95% -confidence interval for the given `measurement`. Such intervals should -be interpreted with caution. See, for example, Bates et al. -[(2021)](https://arxiv.org/abs/2104.00673). +Type of object returned by [`evaluate`](@ref) (for models plus data) or +[`evaluate!`](@ref) (for machines). Such objects encode estimates of the performance +(generalization error) of a supervised model or outlier detection model. + +When `evaluate`/`evaluate!` is called, a number of train/test pairs ("folds") of row +indices are generated, according to the options provided, which are discussed in the +[`evaluate!`](@ref) doc-string. Rows correspond to observations. The generated train/test +pairs are recorded in the `train_test_rows` field of the `PerformanceEvaluation` struct, +and the corresponding estimates, aggregated over all train/test pairs, are recorded in +`measurement`, a vector with one entry for each measure (metric) recorded in `measure`. + +When displayed, a `PerformanceEvalution` object includes a value under the heading +`1.96*SE`, derived from the standard error of the `per_fold` entries. This value is +suitable for constructing a formal 95% confidence interval for the given +`measurement`. Such intervals should be interpreted with caution. See, for example, Bates +et al. [(2021)](https://arxiv.org/abs/2104.00673). ### Fields -These fields are part of the public API of the `PerformanceEvaluation` -struct. +These fields are part of the public API of the `PerformanceEvaluation` struct. - `model`: model used to create the performance evaluation. In the case a tuning model, this is the best model found. - `measure`: vector of measures (metrics) used to evaluate performance -- `measurement`: vector of measurements - one for each element of - `measure` - aggregating the performance measurements over all - train/test pairs (folds). The aggregation method applied for a given - measure `m` is `aggregation(m)` (commonly `Mean` or `Sum`) +- `measurement`: vector of measurements - one for each element of `measure` - aggregating + the performance measurements over all train/test pairs (folds). The aggregation method + applied for a given measure `m` is + `StatisticalMeasuresBase.external_aggregation_mode(m)` (commonly `Mean()` or `Sum()`) -- `operation` (e.g., `predict_mode`): the operations applied for each - measure to generate predictions to be evaluated. Possibilities are: - $PREDICT_OPERATIONS_STRING. +- `operation` (e.g., `predict_mode`): the operations applied for each measure to generate + predictions to be evaluated. Possibilities are: $PREDICT_OPERATIONS_STRING. -- `per_fold`: a vector of vectors of individual test fold evaluations - (one vector per measure). Useful for obtaining a rough estimate of - the variance of the performance estimate. +- `per_fold`: a vector of vectors of individual test fold evaluations (one vector per + measure). Useful for obtaining a rough estimate of the variance of the performance + estimate. -- `per_observation`: a vector of vectors of individual observation - evaluations of those measures for which - `reports_each_observation(measure)` is true, which is otherwise - reported `missing`. Useful for some forms of hyper-parameter - optimization. +- `per_observation`: a vector of vectors of vectors containing individual per-observation + measurements: for an evaluation `e`, `e.per_observation[m][f][i]` is the measurement for + the `i`th observation in the `f`th test fold, evaluated using the `m`th measure. Useful + for some forms of hyper-parameter optimization. Note that an aggregregated measurement + for some measure `measure` is repeated across all observations in a fold if + `StatisticalMeasures.can_report_unaggregated(measure) == true`. If `e` has been computed + with the `per_observation=false` option, then `e_per_observation` is a vector of + `missings`. -- `fitted_params_per_fold`: a vector containing `fitted params(mach)` - for each machine `mach` trained during resampling - one machine per - train/test pair. Use this to extract the learned parameters for each - individual training event. +- `fitted_params_per_fold`: a vector containing `fitted params(mach)` for each machine + `mach` trained during resampling - one machine per train/test pair. Use this to extract + the learned parameters for each individual training event. -- `report_per_fold`: a vector containing `report(mach)` for each - machine `mach` training in resampling - one machine per train/test - pair. +- `report_per_fold`: a vector containing `report(mach)` for each machine `mach` training + in resampling - one machine per train/test pair. -- `train_test_rows`: a vector of tuples, each of the form `(train, test)`, - where `train` and `test` are vectors of row (observation) indices for - training and evaluation respectively. +- `train_test_rows`: a vector of tuples, each of the form `(train, test)`, where `train` + and `test` are vectors of row (observation) indices for training and evaluation + respectively. - `resampling`: the resampling strategy used to generate the train/test pairs. - `repeats`: the number of times the resampling strategy was repeated. + """ struct PerformanceEvaluation{M, Measure, @@ -617,48 +633,37 @@ end function _check_measure(measure, operation, model, y) - T = scitype(y) + # get observation scitype: + T = MLJBase.guess_observation_scitype(y) + + # get type supported by measure: + T_measure = StatisticalMeasuresBase.observation_scitype(measure) T == Unknown && (return true) - target_scitype(measure) == Unknown && (return true) - prediction_type(measure) == :unknown && (return true) + T_measure == Union{} && (return true) + isnothing(StatisticalMeasuresBase.kind_of_proxy(measure)) && (return true) - avoid = "\nTo override measure checks, set check_measure=false. " - T <: target_scitype(measure) || - throw(ArgumentError( - "\nscitype of target = $T but target_scitype($measure) = "* - "$(target_scitype(measure))."*avoid)) + T <: T_measure || throw(ERR_MEASURES_OBSERVATION_SCITYPE(measure, T_measure, T)) incompatible = model isa Probabilistic && operation == predict && - prediction_type(measure) != :probabilistic + StatisticalMeasuresBase.kind_of_proxy(measure) != LearnAPI.Distribution() if incompatible - if target_scitype(measure) <: - AbstractVector{<:Union{Missing,Finite}} - suggestion = "\nPerhaps you want to set `operation="* - "predict_mode` or need to "* - "specify multiple operations, "* - "one for each measure. " - elseif target_scitype(measure) <: - AbstractVector{<:Union{Missing,Continuous}} - suggestion = "\nPerhaps you want to set `operation="* - "predict_mean` or `operation=predict_median`, or "* - "specify multiple operations, "* - "one for each measure. " + if T <: Union{Missing,Finite} + suggestion = LOG_SUGGESTION1 + elseif T <: Union{Missing,Infinite} + suggestion = LOG_SUGGESTION2 else suggestion = "" end - throw(ArgumentError( - "\n$model <: Probabilistic but prediction_type($measure) = "* - ":$(prediction_type(measure)). "*suggestion*avoid)) + throw(ERR_MEASURES_PROBABILISTIC(measure, suggestion)) end - model isa Deterministic && prediction_type(measure) != :deterministic && - throw(ArgumentError("$model <: Deterministic but "* - "prediction_type($measure) ="* - ":$(prediction_type(measure))."*avoid)) + model isa Deterministic && + StatisticalMeasuresBase.kind_of_proxy(measure) != LearnAPI.LiteralTarget() && + throw(ERR_MEASURES_DETERMINISTIC(measure)) return true @@ -682,13 +687,14 @@ function _actual_measures(measures, model) _measures = measures end - return _measures + # wrap in `robust_measure` to allow unsupported weights to be silently treated as + # uniform when invoked; `_check_measure` will throw appropriate warnings unless + # explicitly suppressed. + return StatisticalMeasuresBase.robust_measure.(_measures) end function _check_weights(weights, nrows) - weights isa AbstractVector{<:Real} || - throw(ERR_WEIGHTS_REAL) length(weights) == nrows || throw(ERR_WEIGHTS_LENGTH) return true @@ -741,21 +747,35 @@ function _actual_operations(operation::Nothing, verbosity) map(measures) do m - prediction_type = MLJBase.prediction_type(m) - target_scitype = MLJBase.target_scitype(m) + # `kind_of_proxy` is the measure trait corresponding to `prediction_type` model + # trait. But it's values are instances of LearnAPI.KindOfProxy, instead of + # symbols: + # + # `LearnAPI.LiteralTarget()` ~ `:deterministic` (`model isa Deterministic`) + # `LearnAPI.Distribution()` ~ `:probabilistic` (`model isa Deterministic`) + # + kind_of_proxy = StatisticalMeasuresBase.kind_of_proxy(m) - if prediction_type === :unknown - return predict - end + # `observation_type` is the measure trait which we need to match the model + # `target_scitype` but the latter refers to the whole target `y`, not a single + # observation. + # + # One day, models will have their own `observation_scitype` + observation_scitype = StatisticalMeasuresBase.observation_scitype(m) + + # One day, models will implement LearnAPI and will get their own `kind_of_proxy` + # trait replacing `prediction_type` and `observation_scitype` trait replacing + # `target_scitype`. + + isnothing(kind_of_proxy) && (return predict) if MLJBase.prediction_type(model) === :probabilistic - if prediction_type === :probabilistic + if kind_of_proxy === LearnAPI.Distribution() return predict - elseif prediction_type === :deterministic - if target_scitype <: AbstractArray{<:Union{Missing,Finite}} + elseif kind_of_proxy === LearnAPI.LiteralTarget() + if observation_scitype <: Union{Missing,Finite} return predict_mode - elseif target_scitype <: - AbstractArray{<:Union{Missing,Continuous,Count}} + elseif observation_scitype <:Union{Missing,Infinite} return predict_mean else throw(err_ambiguous_operation(model, m)) @@ -764,19 +784,21 @@ function _actual_operations(operation::Nothing, throw(err_ambiguous_operation(model, m)) end elseif MLJBase.prediction_type(model) === :deterministic - if prediction_type === :probabilistic + if kind_of_proxy === LearnAPI.Distribution() throw(err_incompatible_prediction_types(model, m)) - elseif prediction_type === :deterministic + elseif kind_of_proxy === LearnAPI.LiteralTarget() return predict else throw(err_ambiguous_operation(model, m)) end - else - if prediction_type === :interval + elseif MLJBase.prediction_type(model) === :interval + if kind_of_proxy === LearnAPI.ConfidenceInterval() return predict else throw(err_ambiguous_operation(model, m)) end + else + throw(err_ambiguous_operation(model, m)) end end end @@ -820,158 +842,123 @@ _process_accel_settings(accel) = throw(ArgumentError("unsupported" * # -------------------------------------------------------------- # User interface points: `evaluate!` and `evaluate` +const RESAMPLING_STRATEGIES = subtypes(ResamplingStrategy) +const RESAMPLING_STRATEGIES_LIST = + join( + map(RESAMPLING_STRATEGIES) do s + name = split(string(s), ".") |> last + "`$name`" + end, + ", ", + " and ", + ) + """ log_evaluation(logger, performance_evaluation) -Log a performance evaluation to `logger`, an object specific to some logging -platform, such as mlflow. If `logger=nothing` then no logging is performed. -The method is called at the end of every call to `evaluate/evaluate!` using -the logger provided by the `logger` keyword argument. + +Log a performance evaluation to `logger`, an object specific to some logging platform, +such as mlflow. If `logger=nothing` then no logging is performed. The method is called at +the end of every call to `evaluate/evaluate!` using the logger provided by the `logger` +keyword argument. + # Implementations for new logging platforms -# -Julia interfaces to workflow logging platforms, such as mlflow (provided by -the MLFlowClient.jl interface) should overload -`log_evaluation(logger::LoggerType, performance_evaluation)`, -where `LoggerType` is a platform-specific type for logger objects. For an -example, see the implementation provided by the MLJFlow.jl package. + +Julia interfaces to workflow logging platforms, such as mlflow (provided by the +MLFlowClient.jl interface) should overload `log_evaluation(logger::LoggerType, +performance_evaluation)`, where `LoggerType` is a platform-specific type for logger +objects. For an example, see the implementation provided by the MLJFlow.jl package. + """ log_evaluation(logger, performance_evaluation) = nothing """ - evaluate!(mach, - resampling=CV(), - measure=nothing, - rows=nothing, - weights=nothing, - class_weights=nothing, - operation=nothing, - repeats=1, - acceleration=default_resource(), - force=false, - verbosity=1, - check_measure=true, - logger=nothing) - -Estimate the performance of a machine `mach` wrapping a supervised -model in data, using the specified `resampling` strategy (defaulting -to 6-fold cross-validation) and `measure`, which can be a single -measure or vector. - -Do `subtypes(MLJ.ResamplingStrategy)` to obtain a list of available -resampling strategies. If `resampling` is not an object of type -`MLJ.ResamplingStrategy`, then a vector of tuples (of the form -`(train_rows, test_rows)` is expected. For example, setting + evaluate!(mach; resampling=CV(), measure=nothing, options...) + +Estimate the performance of a machine `mach` wrapping a supervised model in data, using +the specified `resampling` strategy (defaulting to 6-fold cross-validation) and `measure`, +which can be a single measure or vector. Returns a [`PerformanceEvaluation`](@ref) +object. + +Available resampling strategies are $RESAMPLING_STRATEGIES_LIST. If `resampling` is not an +instance of one of these, then a vector of tuples of the form `(train_rows, test_rows)` +is expected. For example, setting resampling = [((1:100), (101:200)), ((101:200), (1:100))] gives two-fold cross-validation using the first 200 rows of data. -The type of operation (`predict`, `predict_mode`, etc) to be -associated with `measure` is automatically inferred from measure -traits where possible. For example, `predict_mode` will be used for a -`Multiclass` target, if `model` is probabilistic but `measure` is -deterministic. The operations applied can be inspected from the -`operation` field of the object returned. Alternatively, operations -can be explicitly specified using `operation=...`. If `measure` is a -vector, then `operation` must be a single operation, which will be -associated with all measures, or a vector of the same length as -`measure`. - -The resampling strategy is applied repeatedly (Monte Carlo resampling) -if `repeats > 1`. For example, if `repeats = 10`, then `resampling = -CV(nfolds=5, shuffle=true)`, generates a total of 50 `(train, test)` -pairs for evaluation and subsequent aggregation. +Any measure conforming to the +[StatisticalMeasuresBase.jl](https://juliaai.github.io/StatisticalMeasuresBase.jl/dev/) +API can be provided, assuming it can consume multiple observations. -If `resampling isa MLJ.ResamplingStrategy` then one may optionally -restrict the data used in evaluation by specifying `rows`. +Although `evaluate!` is mutating, `mach.model` and `mach.args` are not mutated. -An optional `weights` vector may be passed for measures that support -sample weights (`MLJ.supports_weights(measure) == true`), which is -ignored by those that don't. These weights are not to be confused with -any weights `w` bound to `mach` (as in `mach = machine(model, X, -y, w)`). To pass these to the performance evaluation measures you must -explictly specify `weights=w` in the `evaluate!` call. +# Additional keyword options -Additionally, optional `class_weights` dictionary may be passed -for measures that support class weights -(`MLJ.supports_class_weights(measure) == true`), which is -ignored by those that don't. These weights are not to be confused with -any weights `class_w` bound to `mach` (as in `mach = machine(model, X, -y, class_w)`). To pass these to the performance evaluation measures you -must explictly specify `class_weights=w` in the `evaluate!` call. +- `rows` - vector of observation indices from which both train and test folds are + constructed (default is all observations) -User-defined measures are supported; see the manual for details. +- `operation`/`operations=nothing` - One of $PREDICT_OPERATIONS_STRING, or a vector of + these of the same length as `measure`/`measures`. Automatically inferred if left + unspecified. For example, `predict_mode` will be used for a `Multiclass` target, if + `model` is a probabilistic predictor, but `measure` is expects literal (point) target + predictions. Operations actually applied can be inspected from the `operation` field of + the object returned. -If no measure is specified, then `default_measure(mach.model)` is -used, unless this default is `nothing` and an error is thrown. +- `weights` - per-sample `Real` weights for measures that support them (not to be confused + with weights used in training, such as the `w` in `mach = machine(model, X, y, w)`). -The `acceleration` keyword argument is used to specify the compute resource (a -subtype of `ComputationalResources.AbstractResource`) that will be used to -accelerate/parallelize the resampling operation. +- `class_weights` - dictionary of `Real` per-class weights for use with measures that + support these, in classification problems (not to be confused + with weights used in training, such as the `w` in `mach = machine(model, X, y, w)`). -Although `evaluate!` is mutating, `mach.model` and `mach.args` are -untouched. +- `repeats::Int=1`: set to a higher value for repeated (Monte Carlo) + resampling. For example, if `repeats = 10`, then `resampling = CV(nfolds=5, + shuffle=true)`, generates a total of 50 `(train, test)` pairs for evaluation and + subsequent aggregation. -### Summary of key-word arguments +- `acceleration=CPU1()`: acceleration/parallelization option; can be any instance of + `CPU1`, (single-threaded computation), `CPUThreads` (multi-threaded computation) or + `CPUProcesses` (multi-process computation); default is `default_resource()`. These types + are owned by ComputationalResources.jl. -- `resampling` - resampling strategy (default is `CV(nfolds=6)`) - -- `measure`/`measures` - measure or vector of measures (losses, scores, etc) - -- `rows` - vector of observation indices from which both train and - test folds are constructed (default is all observations) - -- `weights` - per-sample weights for measures that support them (not - to be confused with weights used in training) - -- `class_weights` - dictionary of per-class weights for use with - measures that support these, in classification problems (not to be - confused with per-sample `weights` or with class weights used in - training) - -- `operation`/`operations` - One of $PREDICT_OPERATIONS_STRING, or a - vector of these of the same length as - `measure`/`measures`. Automatically inferred if left unspecified. - -- `repeats` - default is 1; set to a higher value for repeated - (Monte Carlo) resampling - -- `acceleration` - parallelization option; currently supported - options are instances of `CPU1` (single-threaded computation) - `CPUThreads` (multi-threaded computation) and `CPUProcesses` - (multi-process computation); default is `default_resource()`. - -- `force` - default is `false`; set to `true` for force cold-restart +- `force=false`: set to `true` to force cold-restart of each training event -- `verbosity` level, an integer defaulting to 1. - -- `check_measure` - default is `true` +- `verbosity::Int=1` logging level; can be negative -- `logger` - a logger object (see [`MLJBase.log_evaluation`](@ref)) +- `check_measure=true`: whether to screen measures for possible incompatibility with the + model. Will not catch all incompatibilities. +- `per_observation=true`: whether to calculate estimates for individual observations; if + `false` the `per_observation` field of the returned object is populated with + `missing`s. Setting to `false` may reduce compute time and allocations. -### Return value +- `logger` - a logger object (see [`MLJBase.log_evaluation`](@ref)) -A [`PerformanceEvaluation`](@ref) object. See -[`PerformanceEvaluation`](@ref) for details. +See also [`evaluate`](@ref), [`PerformanceEvaluation`](@ref) """ -function evaluate!(mach::Machine{<:Measurable}; - resampling=CV(), - measures=nothing, - measure=measures, - weights=nothing, - class_weights=nothing, - operations=nothing, - operation=operations, - acceleration=default_resource(), - rows=nothing, - repeats=1, - force=false, - check_measure=true, - verbosity=1, - logger=nothing) + +function evaluate!( + mach::Machine{<:Measurable}; + resampling=CV(), + measures=nothing, + measure=measures, + weights=nothing, + class_weights=nothing, + operations=nothing, + operation=operations, + acceleration=default_resource(), + rows=nothing, + repeats=1, + force=false, + check_measure=true, + per_observation=true, + verbosity=1, + logger=nothing, + ) # this method just checks validity of options, preprocess the # weights, measures, operations, and dispatches a @@ -1005,26 +992,52 @@ function evaluate!(mach::Machine{<:Measurable}; verbosity, check_measure) - _warn_about_unsupported(supports_weights, - "Sample", _measures, weights, verbosity) - _warn_about_unsupported(supports_class_weights, - "Class", _measures, class_weights, verbosity) + _warn_about_unsupported( + StatisticalMeasuresBase.supports_weights, + "Sample", + _measures, + weights, + verbosity, + ) + _warn_about_unsupported( + StatisticalMeasuresBase.supports_class_weights, + "Class", + _measures, + class_weights, + verbosity, + ) _acceleration= _process_accel_settings(acceleration) - evaluate!(mach, resampling, weights, class_weights, rows, verbosity, - repeats, _measures, _operations, _acceleration, force, logger, - resampling) - + evaluate!( + mach, + resampling, + weights, + class_weights, + rows, + verbosity, + repeats, + _measures, + _operations, + _acceleration, + force, + per_observation, + logger, + resampling, + ) end """ - evaluate(model, data...; cache=true, kw_options...) + evaluate(model, data...; cache=true, options...) Equivalent to `evaluate!(machine(model, data..., cache=cache); -wk_options...)`. See the machine version `evaluate!` for the complete +options...)`. See the machine version `evaluate!` for the complete list of options. +Returns a [`PerformanceEvaluation`](@ref) object. + +See also [`evaluate!`](@ref). + """ evaluate(model::Measurable, args...; cache=true, kwargs...) = evaluate!(machine(model, args...; cache=cache); kwargs...) @@ -1173,30 +1186,32 @@ const AbstractRow = Union{AbstractVector{<:Integer}, Colon} const TrainTestPair = Tuple{AbstractRow, AbstractRow} const TrainTestPairs = AbstractVector{<:TrainTestPair} -# helper: -_feature_dependencies_exist(measures) = - !all(m->!(is_feature_dependent(m)), measures) - -# helper: -function measure_specific_weights(measure, weights, class_weights, test) - supports_weights(measure) && supports_class_weights(measure) && - error("Encountered a measure that simultaneously supports "* - "(per-sample) weights and class weights. ") - if supports_weights(measure) - weights === nothing && return nothing - return weights[test] - end - supports_class_weights(measure) && return class_weights - return nothing -end +_view(::Nothing, rows) = nothing +_view(weights, rows) = view(weights, rows) # Evaluation when `resampling` is a TrainTestPairs (CORE EVALUATOR): -# `user_resampling` keyword argument is the user defined resampling strategy -function evaluate!(mach::Machine, resampling, weights, class_weights, rows, - verbosity, repeats, measures, operations, acceleration, - force, logger, user_resampling) +function evaluate!( + mach::Machine, + resampling, + weights, + class_weights, + rows, + verbosity, + repeats, + measures, + operations, + acceleration, + force, + per_observation_flag, + logger, + user_resampling, + ) + + # Note: `user_resampling` keyword argument is the user-defined resampling strategy, + # while `resampling` is always a `TrainTestPairs`. - # Note: `rows` and `repeats` are ignored here + # Note: `rows` and `repeats` are only passed to the final `PeformanceEvaluation` + # object to be returned and are not otherwise used here. if !(resampling isa TrainTestPairs) error("`resampling` must be an "* @@ -1206,12 +1221,21 @@ function evaluate!(mach::Machine, resampling, weights, class_weights, rows, X = mach.args[1]() y = mach.args[2]() + nrows = MLJBase.nrows(y) nfolds = length(resampling) + test_fold_sizes = map(resampling) do train_test_pair + test = last(train_test_pair) + test isa Colon && (return nrows) + length(test) + end - nmeasures = length(measures) + # weights used to aggregate per-fold measurements, which depends on a measures + # external mode of aggregation: + fold_weights(mode) = nfolds .* test_fold_sizes ./ sum(test_fold_sizes) + fold_weights(::StatisticalMeasuresBase.Sum) = nothing - feature_dependencies_exist = _feature_dependencies_exist(measures) + nmeasures = length(measures) function fit_and_extract_on_fold(mach, k) train, test = resampling[k] @@ -1220,21 +1244,27 @@ function evaluate!(mach::Machine, resampling, weights, class_weights, rows, # that appear (`predict`, `predict_mode`, etc): yhat_given_operation = Dict(op=>op(mach, rows=test) for op in unique(operations)) - if feature_dependencies_exist - Xtest = selectrows(X, test) - else - Xtest = nothing - end - ytest = selectrows(y, test) - measurements = map(measures, operations) do m, op - wtest = measure_specific_weights( - m, - weights, - class_weights, - test - ) - value(m, yhat_given_operation[op], Xtest, ytest, wtest) + ytest = selectrows(y, test) + if per_observation_flag + measurements = map(measures, operations) do m, op + StatisticalMeasuresBase.measurements( + m, + yhat_given_operation[op], + ytest, + _view(weights, test), + class_weights, + ) + end + else + measurements = map(measures, operations) do m, op + m( + yhat_given_operation[op], + ytest, + _view(weights, test), + class_weights, + ) + end end fp = fitted_params(mach) @@ -1267,27 +1297,38 @@ function evaluate!(mach::Machine, resampling, weights, class_weights, rows, measurements_flat = vcat(measurements_vector_of_vectors...) - # in the following rows=folds, columns=measures: + # In the `measurements_matrix` below, rows=folds, columns=measures; each element of + # the matrix is: + # + # - a vector of meausurements, one per observation within a fold, if + # - `per_observation_flag = true`; or + # + # - a single measurment for the whole fold, if `per_observation_flag = false`. + # measurements_matrix = permutedims( reshape(collect(measurements_flat), (nmeasures, nfolds)) ) # measurements for each observation: - per_observation = map(1:nmeasures) do k - m = measures[k] - if reports_each_observation(m) - measurements_matrix[:,k] - else - missing - end + per_observation = if per_observation_flag + map(1:nmeasures) do k + measurements_matrix[:,k] + end + else + fill(missing, nmeasures) end # measurements for each fold: - per_fold = map(1:nmeasures) do k - m = measures[k] - if reports_each_observation(m) - broadcast(MLJBase.aggregate, per_observation[k], [m,]) - else + per_fold = if per_observation_flag + map(1:nmeasures) do k + m = measures[k] + mode = StatisticalMeasuresBase.external_aggregation_mode(m) + map(per_observation[k]) do v + StatisticalMeasuresBase.aggregate(v; mode) + end + end + else + map(1:nmeasures) do k measurements_matrix[:,k] end end @@ -1295,7 +1336,12 @@ function evaluate!(mach::Machine, resampling, weights, class_weights, rows, # overall aggregates: per_measure = map(1:nmeasures) do k m = measures[k] - MLJBase.aggregate(per_fold[k], m) + mode = StatisticalMeasuresBase.external_aggregation_mode(m) + StatisticalMeasuresBase.aggregate( + per_fold[k]; + mode, + weights=fold_weights(mode), + ) end evaluation = PerformanceEvaluation( @@ -1358,39 +1404,36 @@ end repeats = 1, acceleration=default_resource(), check_measure=true, - logger=nothing + per_observation=true, + logger=nothing, ) -Resampling model wrapper, used internally by the `fit` method of -`TunedModel` instances and `IteratedModel` instances. See -[`evaluate!](@ref) for options. Not intended for general use. +Resampling model wrapper, used internally by the `fit` method of `TunedModel` instances +and `IteratedModel` instances. See [`evaluate!](@ref) for options. Not intended for use by +general user, who will ordinarily use [`evaluate!`](@ref) directly. -Given a machine `mach = machine(resampler, args...)` one obtains a -performance evaluation of the specified `model`, performed according -to the prescribed `resampling` strategy and other parameters, using -data `args...`, by calling `fit!(mach)` followed by +Given a machine `mach = machine(resampler, args...)` one obtains a performance evaluation +of the specified `model`, performed according to the prescribed `resampling` strategy and +other parameters, using data `args...`, by calling `fit!(mach)` followed by `evaluate(mach)`. -On subsequent calls to `fit!(mach)` new train/test pairs of row -indices are only regenerated if `resampling`, `repeats` or `cache` -fields of `resampler` have changed. The evolution of an RNG field of -`resampler` does *not* constitute a change (`==` for `MLJType` objects -is not sensitive to such changes; see [`is_same_except'](@ref)). +On subsequent calls to `fit!(mach)` new train/test pairs of row indices are only +regenerated if `resampling`, `repeats` or `cache` fields of `resampler` have changed. The +evolution of an RNG field of `resampler` does *not* constitute a change (`==` for +`MLJType` objects is not sensitive to such changes; see [`is_same_except`](@ref)). -If there is single train/test pair, then warm-restart behavior of the -wrapped model `resampler.model` will extend to warm-restart behaviour -of the wrapper `resampler`, with respect to mutations of the wrapped -model. +If there is single train/test pair, then warm-restart behavior of the wrapped model +`resampler.model` will extend to warm-restart behaviour of the wrapper `resampler`, with +respect to mutations of the wrapped model. -The sample `weights` are passed to the specified performance measures -that support weights for evaluation. These weights are not to be -confused with any weights bound to a `Resampler` instance in a -machine, used for training the wrapped `model` when supported. +The sample `weights` are passed to the specified performance measures that support weights +for evaluation. These weights are not to be confused with any weights bound to a +`Resampler` instance in a machine, used for training the wrapped `model` when supported. -The sample `class_weights` are passed to the specified performance -measures that support per-class weights for evaluation. These weights -are not to be confused with any weights bound to a `Resampler` instance -in a machine, used for training the wrapped `model` when supported. +The sample `class_weights` are passed to the specified performance measures that support +per-class weights for evaluation. These weights are not to be confused with any weights +bound to a `Resampler` instance in a machine, used for training the wrapped `model` when +supported. """ mutable struct Resampler{S, L} <: Model @@ -1404,6 +1447,7 @@ mutable struct Resampler{S, L} <: Model check_measure::Bool repeats::Int cache::Bool + per_observation::Bool logger::L end @@ -1433,18 +1477,21 @@ function MLJModelInterface.clean!(resampler::Resampler) return warning end -function Resampler(; - model=nothing, +function Resampler( + ;model=nothing, resampling=CV(), - measure=nothing, + measures=nothing, + measure=measures, weights=nothing, class_weights=nothing, - operation=predict, + operations=predict, + operation=operations, acceleration=default_resource(), check_measure=true, repeats=1, cache=true, - logger=nothing + per_observation=true, + logger=nothing, ) resampler = Resampler( model, @@ -1457,7 +1504,8 @@ function Resampler(; check_measure, repeats, cache, - logger + per_observation, + logger, ) message = MLJModelInterface.clean!(resampler) isempty(message) || @warn message @@ -1503,8 +1551,9 @@ function MLJModelInterface.fit(resampler::Resampler, verbosity::Int, args...) _operations, _acceleration, false, + resampler.per_observation, resampler.logger, - resampler.resampling + resampler.resampling, ) fitresult = (machine = mach, evaluation = e) @@ -1568,8 +1617,9 @@ function MLJModelInterface.update( operations, acceleration, false, + resampler.per_observation, resampler.logger, - resampler.resampling + resampler.resampling, ) report = (evaluation = e, ) fitresult = (machine=mach2, evaluation=e) diff --git a/src/utilities.jl b/src/utilities.jl index 66dd62b7..969fce4c 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -135,20 +135,6 @@ function recursive_setproperty!(obj, ex::Expr, value) return recursive_setproperty!(last_obj, field, value) end -""" - check_dimensions(X, Y) - -Internal function to check two arrays have the same shape. - -""" -@inline function check_dimensions(X, Y) - size(X) == size(Y) || - throw(DimensionMismatch( - "Encountered two objects with sizes $(size(X)) and "* - "$(size(Y)) which needed to match but don't. ")) - return nothing -end - """ check_same_nrows(X, Y) @@ -469,3 +455,93 @@ end generate_name!(model, existing_names; kwargs...) = generate_name!(typeof(model), existing_names; kwargs...) + + +# # OBSERVATION VS CONTAINER HACKINGS TOOLS + +# The following tools are used to bridge the gap between old paradigm of prescribing +# the scitype of containers of observations, and the LearnAPI.jl paradigm of prescribing +# only the scitype of the observations themeselves. This is needed because measures are +# now taken from StatisticalMeasures.jl which follows the LearnAPI.jl paradigm, but model +# `target_scitype` refers to containers. + +""" + observation(S) + +*Private method.* + +Tries to infer the per-observation scitype from the scitype of `S`, when `S` is known to +be the scitype of some container with multiple observations; here we view the scitype for +one row of a table to be the scitype of the row converted to a vector. Return `Unknown` if +unable to draw reliable inferrence. + + +The observation scitype for a table is here understood as the scitype of a row converted +to a vector. + +""" +observation(::Type) = Unknown +observation(::Type{AbstractVector{S}}) where S = S +observation(::Type{AbstractArray{S,N}}) where {S,N} = AbstractArray{S,N-1} +for T in [:Continuous, :Count, :Finite, :Infinite, :Multiclass, :OrderedFactor] + TM = "Union{Missing,$T}" |> Meta.parse + for S in [T, TM] + quote + observation(::Type{AbstractVector{<:$S}}) = $S + observation(::Type{AbstractArray{<:$S,N}}) where N = AbstractArray{<:$S,N-1} + observation(::Type{Table{<:AbstractVector{<:$S}}}) = AbstractVector{<:$S} + end |> eval + end +end +# note that in Julia `f(::Type{AbstractVector{<:T}}) where T = T` has not a well-formed +# left-hand side + +""" + guess_observation_scitype(y) + +*Private method.* + +If `y` is an `AbstractArray`, return the scitype of `y[:, :, ..., :, 1]`. If `y` is a +table, return the scitype of the first row, converted to a vector, unless this row has +`missing` elements, in which case return `Unknown`. + +In all other cases, `Unknown`. + +``` +julia> guess_observation_scitype([missing, 1, 2, 3]) +Union{Missing, Count} + +julia> guess_observation_scitype(rand(3, 2)) +AbstractVector{Continuous} + +julia> guess_observation_scitype((x=rand(3), y=rand(Bool, 3))) +AbstractVector{Union{Continuous, Count}} + +julia> guess_observation_scitype((x=[missing, 1, 2], y=[1, 2, 3])) +Unknown +``` +""" +guess_observation_scitype(y) = guess_observation_scitype(y, Val(Tables.istable(y))) +guess_observation_scitype(y, ::Any) = Unknown +guess_observation_scitype(y::AbstractArray, ::Val{false}) = observation(scitype(y)) +function guess_observation_scitype(table, ::Val{true}) + row = Tables.subset(table, 1, viewhint=false) |> collect + E = eltype(row) + nonmissingtype(E) == E || return Unknown + scitype(row) +end + +""" + guess_model_targetobservation_scitype(model) + +*Private method* + +Try to infer a lowest upper bound on the scitype of target observations acceptable to +`model`, by inspecting `target_scitype(model)`. Return `Unknown` if unable to draw reliable +inferrence. + +The observation scitype for a table is here understood as the scitype of a row converted +to a vector. + +""" +guess_model_target_observation_scitype(model) = observation(target_scitype(model)) diff --git a/test/_models/simple_composite_model.jl b/test/_models/simple_composite_model.jl index 0ff413cb..09951d49 100644 --- a/test/_models/simple_composite_model.jl +++ b/test/_models/simple_composite_model.jl @@ -1,47 +1,38 @@ -export SimpleDeterministicCompositeModel, SimpleDeterministicNetworkCompositeModel, - SimpleProbabilisticCompositeModel, SimpleProbabilisticNetworkCompositeModel +export SimpleDeterministicNetworkCompositeModel, + SimpleProbabilisticNetworkCompositeModel using MLJBase const COMPOSITE_MODELS = [ - :SimpleDeterministicCompositeModel, - :SimpleProbabilisticCompositeModel, :SimpleDeterministicNetworkCompositeModel, :SimpleProbabilisticNetworkCompositeModel ] const REGRESSORS = Dict( - :SimpleDeterministicCompositeModel => :DeterministicConstantRegressor, :SimpleDeterministicNetworkCompositeModel => :DeterministicConstantRegressor, - :SimpleProbabilisticCompositeModel => :ConstantRegressor, :SimpleProbabilisticNetworkCompositeModel => :ConstantRegressor, ) const REGRESSOR_SUPERTYPES = Dict( - :SimpleDeterministicCompositeModel => :Deterministic, :SimpleDeterministicNetworkCompositeModel => :Deterministic, - :SimpleProbabilisticCompositeModel => :Probabilistic, :SimpleProbabilisticNetworkCompositeModel => :Probabilistic, ) const COMPOSITE_SUPERTYPES = Dict( - :SimpleDeterministicCompositeModel => :DeterministicComposite, :SimpleDeterministicNetworkCompositeModel => :DeterministicNetworkComposite, - :SimpleProbabilisticCompositeModel => :ProbabilisticComposite, :SimpleProbabilisticNetworkCompositeModel => :ProbabilisticNetworkComposite, ) - for model in COMPOSITE_MODELS regressor = REGRESSORS[model] regressor_supertype = REGRESSOR_SUPERTYPES[model] composite_supertype = COMPOSITE_SUPERTYPES[model] - quote + quote """ (model)(; regressor=$($(regressor))(), transformer=FeatureSelector()) Construct a composite model consisting of a transformer - (`Unsupervised` model) followed by a `$($(regressor_supertype))` model. Mainly - intended for internal testing . + (`Unsupervised` model) followed by a `$($(regressor_supertype))` model. + Intended for internal testing only. """ mutable struct $(model){ @@ -67,36 +58,18 @@ for model in COMPOSITE_MODELS is_pure_julia = true, is_wrapper = true ) - + MLJBase.input_scitype(::Type{<:$(model){L,T}}) where {L,T} = MLJBase.input_scitype(T) MLJBase.target_scitype(::Type{<:$(model){L,T}}) where {L,T} = MLJBase.target_scitype(L) - + end |> eval end ## FIT METHODS -for model in COMPOSITE_MODELS[1:2] - @eval function MLJBase.fit( - composite::$(model), verbosity::Integer, Xtrain, ytrain - ) - X = source(Xtrain) # instantiates a source node - y = source(ytrain) - - t = machine(composite.transformer, X) - Xt = transform(t, X) - l = machine(composite.model, Xt, y) - yhat = predict(l, Xt) - - mach = machine($(REGRESSOR_SUPERTYPES[model])(), X, y; predict=yhat) - - return!(mach, composite, verbosity) - end -end - -for model in COMPOSITE_MODELS[3:4] +for model in COMPOSITE_MODELS @eval function MLJBase.prefit( composite::$(model), verbosity::Integer, diff --git a/test/composition/learning_networks/deprecated_machines.jl b/test/composition/learning_networks/deprecated_machines.jl deleted file mode 100644 index 19b580d6..00000000 --- a/test/composition/learning_networks/deprecated_machines.jl +++ /dev/null @@ -1,167 +0,0 @@ -module TestLearningNetworkMachines - -const depwarn=false - -using Test -using ..Models -using ..TestUtilities -using MLJBase -using Tables -using StableRNGs -using Serialization -rng = StableRNG(616161) - -# A dummy clustering model: -mutable struct DummyClusterer <: Unsupervised - n::Int -end -DummyClusterer(; n=3) = DummyClusterer(n) -function MLJBase.fit(model::DummyClusterer, verbosity::Int, X) - Xmatrix = Tables.matrix(X) - n = min(size(Xmatrix, 2), model.n) - centres = Xmatrix[1:n, :] - levels = categorical(1:n) - report = (centres=centres,) - fitresult = levels - return fitresult, nothing, report -end -MLJBase.transform(model::DummyClusterer, fitresult, Xnew) = - selectcols(Xnew, 1:length(fitresult)) -MLJBase.predict(model::DummyClusterer, fitresult, Xnew) = - [fill(fitresult[1], nrows(Xnew))...] - - -N = 20 -X = (a = rand(N), b = categorical(rand("FM", N))) - -@testset "signature helpers" begin - @test MLJBase._call(NamedTuple()) == NamedTuple() - a = source(:a) - b = source(:b) - W = source(:W) - yhat = source(:yhat) - s = (transform=W, - report=(a=a, b=b), - predict=yhat) - @test MLJBase._report_part(s) == (a=a, b=b) - @test MLJBase._operation_part(s) == (transform=W, predict=yhat) - @test MLJBase._nodes(s) == (W, yhat, a, b) - @test MLJBase._operations(s) == (:transform, :predict) - R = MLJBase._call(MLJBase._report_part(s)) - @test R.a == :a - @test R.b == :b -end - -@testset "wrapping a learning network in a machine" begin - - # unsupervised: - Xs = source(X) - W = transform(machine(OneHotEncoder(), Xs), Xs) - clust = DummyClusterer(n=2) - m = machine(clust, W) - yhat = predict(m, W) - Wout = transform(m, W) - rnode = source(:stuff) - - # test of `fitted_params(::NamedTuple)': - fit!(Wout, verbosity=0) - - @test_throws(MLJBase.ERR_BAD_SIGNATURE, - machine(Unsupervised(); - predict=yhat, - fitted_params=rnode, - depwarn) - ) - @test_throws(MLJBase.ERR_EXPECTED_NODE_IN_SIGNATURE, - machine(Unsupervised(); - predict=42, - depwarn) - ) - @test_throws(MLJBase.ERR_EXPECTED_NODE_IN_SIGNATURE, - machine(Unsupervised(), Xs; - predict=yhat, - transform=Wout, - report=(some_stuff=42,), - depwarn) - ) - mach = machine(Unsupervised(), Xs; - predict=yhat, - transform=Wout, - report=(some_stuff=rnode,), - depwarn) - @test mach.args == (Xs, ) - @test mach.args[1] == Xs - fit!(mach, force=true, verbosity=0) - Θ = mach.fitresult - @test Θ.predict == yhat - @test Θ.transform == Wout - Θ.report.some_stuff == rnode - @test report(mach).some_stuff == :stuff - @test report(mach).machines == fitted_params(mach).machines - - # supervised - y = rand("ab", N) |> categorical; - ys = source(y) - mm = machine(ConstantClassifier(), W, ys) - yhat = predict(mm, W) - e = @node auc(yhat, ys) - - @test_throws Exception machine(; predict=yhat, depwarn) - mach = machine(Probabilistic(), Xs, ys; - predict=yhat, - report=(training_auc=e,), - depwarn) - @test mach.model isa Probabilistic - @test_throws ArgumentError machine(Probabilistic(), Xs, ys; depwarn) - @test_throws ArgumentError machine(Probabilistic(), Xs, ys; - report=(training_auc=e,), - depwarn) - - # test extra report items coming from `training_auc=e` above - fit!(mach, verbosity=0) - err = auc(yhat(), y) - @test report(mach).training_auc ≈ err - - # supervised - predict_mode - @test predict_mode(mach, X) == mode.(predict(mach, X)) - predict_mode(mach, rows=1:2) == predict_mode(mach, rows=:)[1:2] - - # evaluate a learning machine - evaluate!(mach, measure=LogLoss(), verbosity=0) - - # supervised - predict_median, predict_mean - X1, y1 = make_regression(20) - - Xs = source(X1); ys = source(y1) - mm = machine(ConstantRegressor(), Xs, ys) - yhat = predict(mm, Xs) - mach = fit!(machine(Probabilistic(), Xs, ys; predict=yhat, depwarn), verbosity=0) - @test predict_mean(mach, X1) ≈ mean.(predict(mach, X1)) - @test predict_median(mach, X1) ≈ median.(predict(mach, X1)) - -end - -mutable struct DummyComposite <: DeterministicComposite - stand1 - stand2 -end - -@testset "issue 377" begin - stand = Standardizer() - model = DummyComposite(stand, stand) - - Xs = source() - mach1 = machine(model.stand1, Xs) - X1 = transform(mach1, Xs) - mach2 = machine(model.stand2, X1) - X2 = transform(mach2, X1) - - mach = machine(Unsupervised(), Xs; transform=X2, depwarn) - @test_logs((:error, r"The hyper"), - @test_throws(ArgumentError, - MLJBase.network_model_names(model, mach))) -end - -end - -true diff --git a/test/composition/learning_networks/nodes.jl b/test/composition/learning_networks/nodes.jl index 1f175d45..e79cec9d 100644 --- a/test/composition/learning_networks/nodes.jl +++ b/test/composition/learning_networks/nodes.jl @@ -6,6 +6,7 @@ using MLJBase using ..Models using ..TestUtilities using CategoricalArrays +using StatisticalMeasures import Random.seed! seed!(1234) diff --git a/test/composition/learning_networks/replace.jl b/test/composition/learning_networks/replace.jl index fab3f16c..6186bf9c 100644 --- a/test/composition/learning_networks/replace.jl +++ b/test/composition/learning_networks/replace.jl @@ -33,8 +33,6 @@ zhat = inverse_transform(standM, uhat) yhat = exp(zhat) enode = @node mae(ys, yhat) -_header(accel) = - @testset "replace() method; $(typeof(accel))" for accel in (CPU1(), CPUThreads()) fit!(yhat, verbosity=0, acceleration=accel) @@ -50,15 +48,12 @@ _header(accel) = knn2 = deepcopy(knn) # duplicate the network with `yhat` as glb: - yhat_clone = @test_logs( - (:warn, r"No replacement"), - replace( - yhat, - hot=>hot2, - knn=>knn2, - ys=>source(42); - copy_models_deeply=false, - ), + yhat_clone = replace( + yhat, + hot=>hot2, + knn=>knn2, + ys=>source(42); + copy_unspecified_deeply=false, ) # test models and sources duplicated correctly: @@ -79,16 +74,13 @@ _header(accel) = @test all(isempty, sources(yhat_ser)) # duplicate a signature: - signature = (predict=yhat, report=(mae=enode,)) |> MLJBase.signature - signature_clone = @test_logs( - (:warn, r"No replacement"), - replace( - signature, - hot=>hot2, - knn=>knn2, - ys=>source(42); - copy_models_deeply=false, - ) + signature = (predict=yhat, report=(mae=enode,)) |> MLJBase.Signature + signature_clone = replace( + signature, + hot=>hot2, + knn=>knn2, + ys=>source(2*y); + copy_unspecified_deeply=false, ) glb_node = glb(signature_clone) models_clone = MLJBase.models(glb_node) @@ -97,28 +89,20 @@ _header(accel) = @test models_clone[3] === hot2 sources_clone = sources(glb_node) @test sources_clone[1]() == X - @test sources_clone[2]() === 42 + @test sources_clone[2]() == 2*y + + # warning thrown + @test_logs( + (:warn, r"No replacement"), + replace( + signature, + hot=>hot2, + knn=>knn2, + ys=>source(2*y); + ), + ) - # duplicate a learning network machine: - mach = machine(Deterministic(), Xs, ys; - predict=yhat, - report=(mae=enode,)) - mach2 = replace(mach, hot=>hot2, knn=>knn2, - ys=>source(ys.data); - empty_unspecified_sources=true) - ss = sources(glb(mach2)) - @test isempty(ss[1]) - mach2 = @test_logs((:warn, r"No replacement"), - replace(mach, hot=>hot2, knn=>knn2, - ys=>source(ys.data))) - yhat2 = mach2.fitresult.predict - fit!(mach, verbosity=0) - fit!(mach2, verbosity=0) - @test predict(mach, X) ≈ predict(mach2, X) - @test report(mach).mae ≈ report(mach2).mae - - @test mach2.args[1]() == Xs() - @test mach2.args[2]() == ys() + yhat2 = MLJBase.operation_nodes(signature_clone).predict ## EXTRA TESTS FOR TRAINING SEQUENCE @@ -141,9 +125,7 @@ _header(accel) = @test length(MLJBase.machines(yhat)) == length(MLJBase.machines(yhat2)) @test MLJBase.models(yhat) == MLJBase.models(yhat2) - @test sources(yhat) == sources(yhat2) - @test MLJBase.tree(yhat) == MLJBase.tree(yhat2) - @test yhat() ≈ yhat2() + @test 2yhat() ≈ yhat2() # this change should trigger retraining of all machines except the # univariate standardizer: @@ -159,7 +141,6 @@ _header(accel) = (:train, oakM2), (:train, knnM2)]) end - end # module true diff --git a/test/composition/learning_networks/signatures.jl b/test/composition/learning_networks/signatures.jl index 08785b40..019a9cd5 100644 --- a/test/composition/learning_networks/signatures.jl +++ b/test/composition/learning_networks/signatures.jl @@ -7,6 +7,7 @@ using Tables using Test using MLJModelInterface using OrderedCollections +using StatisticalMeasures @testset "signatures - accessor functions" begin a = source(:a) diff --git a/test/composition/models/deprecated_from_network.jl b/test/composition/models/deprecated_from_network.jl deleted file mode 100644 index 15b56d03..00000000 --- a/test/composition/models/deprecated_from_network.jl +++ /dev/null @@ -1,621 +0,0 @@ -module TestFromComposite - -using Test -using Tables -using MLJBase -using ..Models -using ..TestUtilities -using CategoricalArrays -using StableRNGs -using Parameters -rng = StableRNG(616161) - -ridge_model = FooBarRegressor(lambda=0.1) -selector_model = FeatureSelector() - -import MLJBase.@nodepwarn_from_network -const depwarn = false - -## FROM_NETWORK_PREPROCESS - -# supervised: -Xs = source(nothing) -ys = source(nothing) -z = log(ys) -stand = UnivariateStandardizer() -standM = machine(stand, z) -u = transform(standM, z) -hot = OneHotEncoder() -hotM = machine(hot, Xs) -W = transform(hotM, Xs) -knn = KNNRegressor() -knnM = machine(knn, W, u) -oak = DecisionTreeRegressor() -oakM = machine(oak, W, u) -uhat = 0.5*(predict(knnM, W) + predict(oakM, W)) -zhat = inverse_transform(standM, uhat) -yhat = exp(zhat) - -mach_ex = :(machine(Deterministic(), Xs, ys; predict=yhat, depwarn=false)) - -## TESTING `from_network_preprocess` - -ex = Meta.parse( - "begin - mutable struct CompositeX - knn_rgs=knn - one_hot_enc=hot - end - target_scitype=AbstractVector{<:Continuous} - input_scitype=Table(Continuous,Multiclass) - end") -mach_, modeltype_ex, struct_ex, no_fields, dic = - MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex) - -eval(Parameters.with_kw(struct_ex, TestFromComposite, false)) -@test supertype(CompositeX) == DeterministicComposite -composite = CompositeX() -@test composite.knn_rgs == knn -@test composite.one_hot_enc == hot -@test dic[:target_scitype] == :(AbstractVector{<:Continuous}) -@test dic[:input_scitype] == :(Table(Continuous, Multiclass)) - -ex = Meta.parse( - "begin - mutable struct Composite4 <: ProbabilisticComposite - knn_rgs=knn - one_hot_enc=hot - end - end") -mach_, modeltype_ex, struct_ex = - MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex) -eval(Parameters.with_kw(struct_ex, TestFromComposite, false)) -@test supertype(Composite4) == ProbabilisticComposite - -ex = Meta.parse( - "mutable struct Composite2 - knn_rgs=knn - one_hot_enc=hot - end") -mach_, modeltype_ex, struct_ex, no_fields, dic = - MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex) -eval(Parameters.with_kw(struct_ex, TestFromComposite, false)) -composite = Composite2() -@test composite.knn_rgs == knn -@test composite.one_hot_enc == hot - -ex = Meta.parse( - "begin - mutable struct Composite6 <: Probabilistic - knn_rgs=knn - one_hot_enc=hot - end - end") -@test_logs((:warn, r"New composite"), - MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex)) - -ex = Meta.parse( - "begin - mutable struct Composite20 - knn_rgs=knn - one_hot_enc=hot - end - target_scitype == Continuous - end") -@test_throws(ArgumentError, - MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex)) - -ex = Meta.parse( - "begin - mutable struct Composite20 - knn_rgs=knn - one_hot_enc=hot - end - Continuous - end") -@test_throws(ArgumentError, - MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex)) - -ex = Meta.parse( - "begin - mutable struct Composite20 - knn_rgs=knn - one_hot_enc=hot - end - 43 = Continuous - end") -@test_throws(ArgumentError, - MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex)) - -ex = Meta.parse( - "begin - mutable struct Composite7 < Probabilistic - knn_rgs=knn - one_hot_enc=hot - end - end") -@test_throws(ArgumentError, - MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex)) - -@test_throws(ArgumentError, - MLJBase.from_network_preprocess(TestFromComposite, knn, ex)) - -ex = Meta.parse( - "begin - Composite3( - knn_rgs=knn, - one_hot_enc=hot) - end") -@test_throws(ArgumentError, - MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex)) - -ex = Meta.parse( - "begin - mutable struct Composite8 - knn_rgs::KNNRegressor=knn - one_hot_enc=hot - end - end") -mach_, modeltype_ex, struct_ex = - MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex) -eval(Parameters.with_kw(struct_ex, TestFromComposite, false)) -VERSION ≥ v"1.3.0-" && - @test fieldtypes(Composite8) == (KNNRegressor, Any) - -# test that you cannot leave "default" component models unspecified: -modeltype_ex = :Composite9 -struct_ex = :(mutable struct Composite9 <: DeterministicComposite - knn_rgs::KNNRegressor - one_hot_enc = hot - end) -@test_logs (:error, r"Problem instantiating") begin - @test_throws Exception begin - MLJBase.from_network_(TestFromComposite, - mach_ex, modeltype_ex, - struct_ex, false, Dict{Symbol,Any}()) - end -end - - -## TEST MACRO-EXPORTED NETWORKS -# (CANNOT WRAP IN @testset) - -# some actual data: -N = 10 -X = MLJBase.table(rand(N, 3)) -y = rand(N) -w = rand(N) - -# supervised with sample weights: -ws = source() -knnM = machine(knn, W, u, ws) -uhat = 0.5*(predict(knnM, W) + predict(oakM, W)) -zhat = inverse_transform(standM, uhat) -yhat = exp(zhat) - -@nodepwarn_from_network machine( - Deterministic(), Xs, ys, ws; predict=yhat, depwarn=false -) begin - mutable struct CompositeX1 - knn_rgs=knn - one_hot_enc=hot - end - supports_weights = true - target_scitype = AbstractVector{<:Continuous} -end -model = CompositeX1() -@test supports_weights(model) -@test target_scitype(model) == AbstractVector{<:Continuous} -@test_logs((:warn, r""), predict(fit!(machine(model, X, y, w), verbosity=-1), X)); -# unsupervised: -@nodepwarn_from_network machine(Unsupervised(), Xs; transform=W, depwarn=false) begin - mutable struct CompositeX2 - one_hot_enc=hot - end -end -model = CompositeX2() -@test_logs((:warn, r""), transform(fit!(machine(model, X), verbosity=-1), X)); - - -# second supervised test: -fea = FeatureSelector() -feaM = machine(fea, Xs) -G = transform(feaM, Xs) -hotM = machine(hot, G) -H = transform(hotM, G) -elm = DecisionTreeClassifier() -elmM = machine(elm, H, ys) -yhat = predict(elmM, H) - -@nodepwarn_from_network machine(Probabilistic(), Xs, ys; predict=yhat, depwarn=false) begin - mutable struct CompositeX3 - selector=fea - one_hot=hot - tree=elm - end -end -model = CompositeX3() -y = coerce(y, Multiclass) -@test @test_logs((:warn, r""), predict(fit!(machine(model, X, y), verbosity=-1), X)) isa - AbstractVector{<:UnivariateFinite} - -# yet more examples: -x1 = map(n -> mod(n,3), rand(rng, UInt8, 100)) |> categorical; -x2 = randn(rng, 100); -X = (x1=x1, x2=x2); -y = x2.^2; - -Xs = source(X) -ys = source(y) -z = log(ys) -stand = UnivariateStandardizer() -standM = machine(stand, z) -u = transform(standM, z) -hot = OneHotEncoder() -hotM = machine(hot, Xs) -W = transform(hotM, Xs) -knn = KNNRegressor() -knnM = machine(knn, W, u) -oak = DecisionTreeRegressor() -oakM = machine(oak, W, u) -uhat = 0.5*(predict(knnM, W) + predict(oakM, W)) -zhat = inverse_transform(standM, uhat) -yhat = exp(zhat) - -mach = machine(Deterministic(), Xs, ys; predict=yhat, depwarn=false) - -@nodepwarn_from_network mach begin - mutable struct Composite10 - knn_rgs::KNNRegressor=knn - one_hot_enc=hot - end -end - -model_ = Composite10() - -mach = machine(model_, X, y) - -@test_logs((:warn, r""), - @test_model_sequence(fit_only!(mach), - [(:train, model_), (:train, stand), (:train, hot), - (:train, knn), (:train, oak)], - [(:train, model_), (:train, hot), (:train, stand), - (:train, knn), (:train, oak)], - [(:train, model_), (:train, stand), (:train, hot), - (:train, oak), (:train, knn)], - [(:train, model_), (:train, hot), (:train, stand), - (:train, oak), (:train, knn)]) - ) - -model_.knn_rgs.K = 55 -knn = model_.knn_rgs -@test_model_sequence(fit_only!(mach), - [(:update, model_), (:skip, stand), (:skip, hot), - (:update, knn), (:skip, oak)], - [(:update, model_), (:skip, hot), (:skip, stand), - (:update, knn), (:skip, oak)], - [(:update, model_), (:skip, stand), (:skip, hot), - (:skip, oak), (:update, knn)], - [(:update, model_), (:skip, hot), (:skip, stand), - (:skip, oak), (:update, knn)]) - - -@test MLJBase.tree(mach.fitresult.predict).arg1.arg1.arg1.arg1.model.K == 55 - -multistand = Standardizer() -multistandM = machine(multistand, W) -W2 = transform(multistandM, W) - -mach = machine(Unsupervised(), Xs; transform=W2, depwarn=false) - -@nodepwarn_from_network mach begin - mutable struct MyTransformer - one_hot=hot - end -end - -model_ = MyTransformer() - -mach = machine(model_, X) -@test_logs((:warn, r""), - @test_model_sequence fit_only!(mach) [(:train, model_), - (:train, hot), (:train, multistand)] - ) -model_.one_hot.drop_last=true -hot = model_.one_hot -@test_model_sequence fit_only!(mach) [(:update, model_), - (:update, hot), (:train, multistand)] - -# check nested fitted_params: -FP = MLJBase.fitted_params(mach) -@test keys(FP) == (:one_hot, :machines, :fitted_params_given_machine) -@test Set(FP.one_hot.fitresult.all_features) == Set(keys(X)) - -transform(mach, X); - - -## TEST MACRO-EXPORTED SUPERVISED NETWORK WITH SAMPLE WEIGHTS - -rng = StableRNG(56161) -N = 500 -X = (x = rand(rng, 3N), ); -y = categorical(rand(rng, "abc", 3N)); -# define class weights :a, :b, :c in ration 2:4:1 -w = map(y) do η - if η == 'a' - return 2 - elseif η == 'b' - return 4 - else - return 1 - end -end; -Xs = source(X) -ys = source(y) -ws = source(w) - -standM = machine(Standardizer(), Xs) -W = transform(standM, Xs) - -rgs = ConstantClassifier() # supports weights -rgsM = machine(rgs, W, ys, ws) -yhat = predict(rgsM, W) - -fit!(yhat, verbosity=0) -fit!(yhat, rows=1:div(N,2), verbosity=0) -yhat(rows=1:div(N,2)); - -mach = machine(Probabilistic(), Xs, ys, ws; predict=yhat, depwarn=false) - -@nodepwarn_from_network mach begin - mutable struct MyComposite - regressor=rgs - end - supports_weights=true -end - -my_composite = MyComposite() -@test MLJBase.supports_weights(my_composite) -mach = @test_logs((:warn, r""), fit!(machine(my_composite, X, y), verbosity=0)) -Xnew = selectrows(X, 1:div(N,2)) -predict(mach, Xnew)[1] -posterior = predict(mach, Xnew)[1] - -# "posterior" is roughly uniform: -@test abs(pdf(posterior, 'b')/(pdf(posterior, 'a')) - 1) < 0.15 -@test abs(pdf(posterior, 'b')/(pdf(posterior, 'c')) - 1) < 0.15 - -# now add weights: -mach = @test_logs((:warn, r""), - fit!(machine(my_composite, X, y, w), rows=1:div(N,2), verbosity=0) - ) -posterior = predict(mach, Xnew)[1] - -# "posterior" is skewed appropriately in weighted case: -@test abs(pdf(posterior, 'b')/(2*pdf(posterior, 'a')) - 1) < 0.15 -@test abs(pdf(posterior, 'b')/(4*pdf(posterior, 'c')) - 1) < 0.19 - -# composite with no fields: -mach = machine(Probabilistic(), Xs, ys, ws; predict=yhat, depwarn=false) -@nodepwarn_from_network mach begin - struct CompositeWithNoFields - end -end -composite_with_no_fields = CompositeWithNoFields() -mach = @test_logs((:warn, r""), fit!(machine(composite_with_no_fields, X, y), verbosity=0)) - - -## EXPORTING A TRANSFORMER WITH PREDICT AND TRANSFORM - -# A dummy clustering model: -mutable struct DummyClusterer <: Unsupervised - n::Int -end -DummyClusterer(; n=3) = DummyClusterer(n) -function MLJBase.fit(model::DummyClusterer, verbosity::Int, X) - Xmatrix = Tables.matrix(X) - n = min(size(Xmatrix, 2), model.n) - centres = Xmatrix[1:n, :] - levels = categorical(1:n) - report = (centres=centres,) - fitresult = levels - return fitresult, nothing, report -end -MLJBase.transform(model::DummyClusterer, fitresult, Xnew) = - selectcols(Xnew, 1:length(fitresult)) -MLJBase.predict(model::DummyClusterer, fitresult, Xnew) = - [fill(fitresult[1], nrows(Xnew))...] - -N = 20 -X = (a = rand(N), b = categorical(rand("FM", N))) - -Xs = source(X) -W = transform(machine(OneHotEncoder(), Xs), Xs) -clust = DummyClusterer(n=2) -m = machine(clust, W) -yhat = predict(m, W) -Wout = transform(m, W) -foo = first(yhat) -mach = machine(Unsupervised(), Xs; - predict=yhat, - transform=Wout, - report=(foo=foo,), - depwarn=false) - -@nodepwarn_from_network mach begin - mutable struct WrappedClusterer - clusterer::Unsupervised = clust - end - input_scitype = Table(Continuous,Multiclass) -end - -model = WrappedClusterer() -mach = @test_logs((:warn, r""), fit!(machine(model, X), verbosity=0)) -fit!(yhat, verbosity=0) -@test predict(mach, X) == yhat() -@test transform(mach, X).a ≈ Wout().a -rep = report(mach) -@test rep.foo == yhat() |> first - - -## EXPORTING A STATIC LEARNING NETWORK (NO TRAINING ARGUMENTS) - -age = [23, 45, 34, 25, 67] -X = (age = age, - gender = categorical(['m', 'm', 'f', 'm', 'f'])) - -struct MyStaticTransformer <: Static - ftr::Symbol -end - -MLJBase.transform(transf::MyStaticTransformer, verbosity, X) = - selectcols(X, transf.ftr) - -Xs = source() -W = transform(machine(MyStaticTransformer(:age)), Xs) -Z = 2*W - -@nodepwarn_from_network machine(Static(), Xs; transform=Z, depwarn=false) begin - struct NoTraining - end -end - -mach = @test_logs((:warn, r""), fit!(machine(NoTraining()), verbosity=0)) -@test transform(mach, X) == 2*X.age - - -## TESTINGS A STACK AND IN PARTICULAR FITTED_PARAMS - -folds(data, nfolds) = - partition(1:nrows(data), (1/nfolds for i in 1:(nfolds-1))...); - -model1 = RidgeRegressor() -model2 = KNNRegressor(K=1) -judge = KNNRegressor(K=1) - -X = source() -y = source() - -folds(X::AbstractNode, nfolds) = node(XX->folds(XX, nfolds), X) -MLJBase.restrict(X::AbstractNode, f::AbstractNode, i) = - node((XX, ff) -> restrict(XX, ff, i), X, f); -MLJBase.corestrict(X::AbstractNode, f::AbstractNode, i) = - node((XX, ff) -> corestrict(XX, ff, i), X, f); - -f = folds(X, 3) - -m11 = machine(model1, corestrict(X, f, 1), corestrict(y, f, 1)) -m12 = machine(model1, corestrict(X, f, 2), corestrict(y, f, 2)) -m13 = machine(model1, corestrict(X, f, 3), corestrict(y, f, 3)) - -y11 = predict(m11, restrict(X, f, 1)); -y12 = predict(m12, restrict(X, f, 2)); -y13 = predict(m13, restrict(X, f, 3)); - -m21 = machine(model2, corestrict(X, f, 1), corestrict(y, f, 1)) -m22 = machine(model2, corestrict(X, f, 2), corestrict(y, f, 2)) -m23 = machine(model2, corestrict(X, f, 3), corestrict(y, f, 3)) - -y21 = predict(m21, restrict(X, f, 1)); -y22 = predict(m22, restrict(X, f, 2)); -y23 = predict(m23, restrict(X, f, 3)); - -y1_oos = vcat(y11, y12, y13); -y2_oos = vcat(y21, y22, y23); - -X_oos = MLJBase.table(hcat(y1_oos, y2_oos)) - -m_judge = machine(judge, X_oos, y) - -m1 = machine(model1, X, y) -m2 = machine(model2, X, y) - -y1 = predict(m1, X); -y2 = predict(m2, X); - -X_judge = MLJBase.table(hcat(y1, y2)) -yhat = predict(m_judge, X_judge) - -@nodepwarn_from_network machine(Deterministic(), X, y; predict=yhat, depwarn=false) begin - mutable struct MyStack - regressor1=model1 - regressor2=model2 - judge=judge - end -end - -my_stack = MyStack() -X, y = make_regression(18, 2) -mach = machine(my_stack, X, y) -@test_logs((:warn, r""), fit!(mach, verbosity=0)) - -fp = fitted_params(mach) -@test keys(fp.judge) == (:tree,) -@test length(fp.regressor1) == 4 -@test length(fp.regressor2) == 4 -@test keys(fp.regressor1[1]) == (:coefficients, :intercept) -@test keys(fp.regressor2[1]) == (:tree,) - - -## ISSUE #377 - -stand1 = Standardizer() -stand2 = Standardizer() - -Xraw = (x=[-2.0, 0.0, 2.0],) -X = source(Xraw) - -mach1 = machine(stand1, X) -X2 = transform(mach1, X) - -mach2 = machine(stand2, X2) -X3 = transform(mach2, X2) - -@nodepwarn_from_network machine(Unsupervised(), X; transform=X3, depwarn=false) begin - mutable struct CompositeZ - s1=stand1 - s2=stand2 - end -end - -# check no problems with network: -fit!(X3) -@test X3().x ≈ [-1.0, 0.0, 1.0] - -# instantiate with identical (===) models in two places: -model = CompositeZ(s1=stand1, s2=stand1) -mach = machine(model, Xraw) -@test_logs((:warn, MLJBase.WARN_NETWORK_MACHINES_DEPRECATION), - (:error, MLJBase.logerr_identical_models([:s1, :s2], model)), - (:error, r"Problem"), - (:info, r"Running"), - (:info, r"Type checks okay"), - @test_throws(MLJBase.ERR_IDENTICAL_MODELS, - fit!(mach, verbosity=-1))) - - -## SOURCE NODES THAT ARE ALSO OPERATION NODES - -stand = Standardizer() - -Xs = source() -mach1 = machine(stand, Xs) -X2 = transform(mach1, Xs) - -network_mach = machine(Unsupervised(), Xs, transform=X2, inverse_transform=Xs, depwarn=false) - -@nodepwarn_from_network network_mach begin - struct AppleComposite - standardizer = stand - end -end - -X = (x = Float64[1, 2, 3],) -mach = machine(AppleComposite(), X) -@test_logs((:warn, r""), fit!(mach, verbosity=0, force=true)) -@test transform(mach, X).x ≈ Float64[-1, 0, 1] -@test inverse_transform(mach, X) == X - -end - -true diff --git a/test/composition/models/deprecated_methods.jl b/test/composition/models/deprecated_methods.jl deleted file mode 100644 index 4cd7c907..00000000 --- a/test/composition/models/deprecated_methods.jl +++ /dev/null @@ -1,459 +0,0 @@ -module TestCompositesCore - -using Test -using MLJBase -using Tables -import MLJBase -using ..Models -using ..TestUtilities -using CategoricalArrays -using OrderedCollections -import Random.seed! -seed!(1234) - -const depwarn=false - -mutable struct Rubbish <: DeterministicComposite - model_in_network - model_not_in_network - some_other_variable -end - -knn = KNNRegressor() -model = Rubbish(knn, OneHotEncoder(), 42) -X, y = make_regression(10, 2) - -@testset "logic for composite model update - fallback()" begin - Xs = source(X) - ys = source(y) - mach0 = machine(Standardizer(), Xs) - W = transform(mach0, Xs) - mach1 = machine(model.model_in_network, W, ys) - yhat = predict(mach1, W) - mach = machine(Deterministic(), Xs, ys; predict=yhat, depwarn) - fitresult, cache, _ = return!(mach, model, 0; depwarn) - network_model_names = getfield(fitresult, :network_model_names) - @test network_model_names == [:model_in_network, nothing] - old_model = cache.old_model - glb_node = MLJBase.glb(mach) - @test !MLJBase.fallback(model, old_model, network_model_names, glb_node) - - # don't fallback if mutating field for a network model: - model.model_in_network.K = 24 - @test !MLJBase.fallback(model, old_model, network_model_names, glb_node) - - # do fallback if replacing field for a network model: - model.model_in_network = KNNRegressor() - @test MLJBase.fallback(model, old_model, network_model_names, glb_node) - - # return to original state: - model.model_in_network = knn - @test !MLJBase.fallback(model, old_model, network_model_names, glb_node) - - # do fallback if a non-network field changes: - model.model_not_in_network.features = [:x1,] - @test MLJBase.fallback(model, old_model, network_model_names, glb_node) - - # return to original state: - model.model_not_in_network = OneHotEncoder() - @test !MLJBase.fallback(model, old_model, network_model_names, glb_node) - - # do fallback if any non-model changes: - model.some_other_variable = 123412 - @test MLJBase.fallback(model, old_model, network_model_names, glb_node) - -end - -model = Rubbish(KNNRegressor(), Standardizer(), 42) - -function MLJBase.fit(model::Rubbish, verbosity, X, y) - Xs = source(X) - ys = source(y) - mach1 = machine(model.model_in_network, Xs, ys) - yhat = predict(mach1, Xs) - mach = machine(Deterministic(), Xs, ys; predict=yhat) - return!(mach, model, verbosity; depwarn) -end - -# `model` is instance of `Rubbish` -mach = fit!(machine(model, X, y), verbosity=0) - -@testset "logic for composite model update - fit!" begin - - # immediately refit: - @test_model_sequence(fit!(mach), [(:skip, model), ]) - - # mutate a field for a network model: - model.model_in_network.K = 24 - @test_model_sequence(fit!(mach), - [(:update, model), (:update, model.model_in_network)]) - - # immediately refit: - @test_model_sequence(fit!(mach), [(:skip, model), ]) - - # replace a field for a network model: - model.model_in_network = KNNRegressor() - @test_model_sequence(fit!(mach), - [(:update, model), (:train, model.model_in_network)]) - - # immediately refit: - @test_model_sequence(fit!(mach), [(:skip, model), ]) - - # mutate a field for a model not in network: - model.model_not_in_network.features = [:x1,] - @test_model_sequence(fit!(mach), - [(:update, model), (:train, model.model_in_network)]) - - # immediately refit: - @test_model_sequence(fit!(mach), [(:skip, model), ]) - - # mutate some field that is not a model: - model.some_other_variable = 123412 - @test_model_sequence(fit!(mach), - [(:update, model), (:train, model.model_in_network)]) -end - -N = 50 -Xin = (a=rand(N), b=rand(N), c=rand(N)); -yin = rand(N); - -train, test = partition(eachindex(yin), 0.7); -Xtrain = MLJBase.selectrows(Xin, train); -ytrain = yin[train]; - -ridge_model = FooBarRegressor(lambda=0.1) -selector_model = FeatureSelector() - -mutable struct WrappedRidge <: DeterministicComposite - ridge -end - -# julia bug? If I return the following test to a @testset block, then -# the test marked with ******* fails (bizarre!) -#@testset "second test of hand-exported network" begin -function MLJBase.fit(model::WrappedRidge, verbosity::Integer, X, y) - Xs = source(X) - ys = source(y) - - stand = Standardizer() - standM = machine(stand, Xs) - W = transform(standM, Xs) - - boxcox = UnivariateBoxCoxTransformer() - boxcoxM = machine(boxcox, ys) - z = transform(boxcoxM, ys) - - ridgeM = machine(model.ridge, W, z) - zhat = predict(ridgeM, W) - yhat = inverse_transform(boxcoxM, zhat) - - mach = machine(Deterministic(), Xs, ys; predict=yhat) - return!(mach, model, verbosity; depwarn) -end - -MLJBase.input_scitype(::Type{<:WrappedRidge}) = - Table(Continuous) -MLJBase.target_scitype(::Type{<:WrappedRidge}) = - AbstractVector{<:Continuous} - -ridge = FooBarRegressor(lambda=0.1) -model_ = WrappedRidge(ridge) -mach = machine(model_, Xin, yin) -id = objectid(mach) -fit!(mach, verbosity=0) -@test objectid(mach) == id # ********* -yhat=predict(mach, Xin); -ridge.lambda = 1.0 -fit!(mach, verbosity=0) -@test predict(mach, Xin) != yhat - -#end - -# A dummy clustering model: -mutable struct DummyClusterer <: Unsupervised - n::Int -end -DummyClusterer(; n=3) = DummyClusterer(n) -function MLJBase.fit(model::DummyClusterer, verbosity::Int, X) - Xmatrix = Tables.matrix(X) - n = min(size(Xmatrix, 2), model.n) - centres = Xmatrix[1:n, :] - levels = categorical(1:n) - report = (centres=centres,) - fitresult = levels - return fitresult, nothing, report -end -MLJBase.transform(model::DummyClusterer, fitresult, Xnew) = - selectcols(Xnew, 1:length(fitresult)) -MLJBase.predict(model::DummyClusterer, fitresult, Xnew) = - [fill(fitresult[1], nrows(Xnew))...] - -# A wrap of above model: -mutable struct WrappedDummyClusterer <: UnsupervisedComposite - model -end -WrappedDummyClusterer(; model=DummyClusterer()) = - WrappedDummyClusterer(model) - -@testset "third test of hand-exported network" begin - function MLJBase.fit(model::WrappedDummyClusterer, verbosity::Int, X) - Xs = source(X) - W = transform(machine(OneHotEncoder(), Xs), Xs) - m = machine(model.model, W) - yhat = predict(m, W) - Wout = transform(m, W) - foo = node(η -> first(η), yhat) - mach = machine(Unsupervised(), - Xs; - predict=yhat, - transform=Wout, - report=(foo=foo,)) - return!(mach, model, verbosity; depwarn) - end - X, _ = make_regression(10, 5); - model = WrappedDummyClusterer(model=DummyClusterer(n=2)) - mach = fit!(machine(model, X), verbosity=0) - model.model.n = 3 - fit!(mach, verbosity=0) - @test transform(mach, X) == selectcols(X, 1:3) - r = report(mach) - @test r.model.centres == MLJBase.matrix(X)[1:3,:] - @test r.foo == predict(mach, rows=:)[1] - fp = fitted_params(mach) - @test :model in keys(fp) - levs = fp.model.fitresult - @test predict(mach, X) == fill(levs[1], 10) -end - - -## NETWORK WITH MULTIPLE NODES REPORTING STATE/ REFIT - -mutable struct TwoStages <: DeterministicComposite - model1 - model2 - model3 -end - -function MLJBase.fit(m::TwoStages, verbosity, X, y) - Xs = source(X) - ys = source(y) - mach1 = machine(m.model1, Xs, ys) - mach2 = machine(m.model2, Xs, ys) - ypred1 = MLJBase.predict(mach1, Xs) - ypred2 = MLJBase.predict(mach2, Xs) - Y = MLJBase.table(hcat(ypred1, ypred2)) - mach3 = machine(m.model3, Y, ys) - ypred3 = MLJBase.predict(mach3, Y) - μpred = node(x->mean(x), ypred3) - σpred = node((x, μ)->mean((x.-μ).^2), ypred3, μpred) - mach = machine(Deterministic(), - Xs, - ys; - predict=ypred3, - report=(μpred=μpred, - σpred=σpred)) - return!(mach, m, verbosity; depwarn) -end - -@testset "Test exported-network with multiple saved nodes and refit" begin - X, y = make_regression(100, 3) - model3 = FooBarRegressor(lambda=1) - twostages = TwoStages(FooBarRegressor(lambda=0.1), - FooBarRegressor(lambda=10), model3) - mach = machine(twostages, X, y) - fit!(mach, verbosity=0) - rep = report(mach) - # All machines have been fitted once - @test rep.machines[1].state == - rep.machines[2].state == - rep.machines[3].state == 1 - # Retrieve current values of interest - μpred = rep.μpred - σpred = rep.σpred - # Change model3 and refit - model3.lambda = 10 - fit!(mach, verbosity=0) - rep = report(mach) - # Machines 1,2 have been fitted once and machine 3 twice - @test rep.machines[1].state == rep.machines[2].state == 1 - @test rep.machines[3].state == 2 - # The new values have been updated - @test rep.μpred != μpred - @test rep.σpred != σpred -end - -## COMPOSITE WITH COMPONENT MODELS STORED IN NTUPLE - -# `modelnames` is a tuple of `Symbol`s, one for each `model` in `models`: -mutable struct Averager{modelnames} <: DeterministicComposite - models::NTuple{<:Any,Deterministic} - weights::Vector{Float64} - Averager(modelnames, models, weights) = - new{modelnames}(models, weights) -end - -# special kw constructor, allowing one to specify the property names -# to be attributed to each component model (see below): -function Averager(; weights=Float64[], named_models...) - nt = NamedTuple(named_models) - modelnames = keys(nt) - models = values(nt) - return Averager(modelnames, models, weights) -end - -# for example: -averager = Averager(weights=[1, 1], - model1=KNNRegressor(K=3), - model2=RidgeRegressor()) - -# so we can do `averager.model1` and `averager.model2`: -Base.propertynames(::Averager{modelnames}) where modelnames = - tuple(:weights, modelnames...) -function Base.getproperty(averager::Averager{modelnames}, - name::Symbol) where modelnames - name === :weights && return getfield(averager, :weights) - models = getfield(averager, :models) - for j in eachindex(modelnames) - name === modelnames[j] && return models[j] - end - error("type Averager has no field $name") -end - -# overload multiplication of a node by a matrix: -import Base.* -*(preds::Node, weights) = node(p->p*weights, preds) - -# learning network wrapped in a fit method: -function MLJBase.fit(averager::Averager{modelnames}, - verbosity, - X, - y) where modelnames - - Xs = source(X) - ys = source(y) - - weights = averager.weights - - machines = [machine(getproperty(averager, name), Xs, ys) for - name in modelnames] - predictions = hcat([predict(mach, Xs) for mach in machines]...) - yhat = (1/sum(weights))*(predictions*weights) - - mach = machine(Deterministic(), Xs, ys; predict=yhat) - return!(mach, averager, verbosity; depwarn) -end - -@testset "composite with component models stored in ntuple" begin - X, y = make_regression(10, 3); - mach = machine(averager, X, y) - fit!(mach, verbosity=0) - fp = fitted_params(mach) - @test keys(fp.model1) == (:tree, ) - @test keys(fp.model2) == (:coefficients, :intercept) - r = report(mach) - @test isnothing(r.model1) - @test isnothing(r.model2) - range(averager, :(model1.K), lower=2, upper=3) -end - - -## DATA FRONT-END IN AN EXPORTED LEARNING NETWORK - -mutable struct Scale <: MLJBase.Static - scaling::Float64 -end - -function MLJBase.transform(s::Scale, _, X) - X isa AbstractVecOrMat && return X * s.scaling - MLJBase.table(s.scaling * MLJBase.matrix(X), prototype=X) -end - -function MLJBase.inverse_transform(s::Scale, _, X) - X isa AbstractVecOrMat && return X / s.scaling - MLJBase.table(MLJBase.matrix(X) / s.scaling, prototype=X) -end - -mutable struct ElephantModel <: ProbabilisticComposite - scaler - clf - cache::Bool -end - -function MLJBase.fit(model::ElephantModel, verbosity, X, y) - - Xs = source(X) - ys = source(y) - - scaler = model.scaler - mach1 = machine(scaler, cache=model.cache) - W = transform(mach1, Xs) - - # a classifier with reformat front-end: - clf = model.clf - mach2 = machine(clf, W, ys, cache=model.cache) - yhat = predict(mach2, W) - - mach = machine(Probabilistic(), Xs, ys, predict=yhat) - return!(mach, model, verbosity; depwarn) -end - -@testset "reformat/selectrows logic in composite model" begin - - X = (x1=ones(5), x2=ones(5)) - y = categorical(collect("abaaa")) - model = ElephantModel(Scale(2.0), - ConstantClassifier(testing=true, bogus=1.0), - true) - mach = machine(model, X, y, cache=false) - - @test_logs((:warn, MLJBase.WARN_NETWORK_MACHINES_DEPRECATION), - (:info, "reformatting X, y"), - (:info, "resampling X, y"), - fit!(mach, verbosity=0, rows=1:3) - ) - @test mach.state == 1 - - # new clf hyperparmater (same rows) means no reformatting or resampling: - model.clf.bogus = 10 - @test_logs fit!(mach, verbosity=0, rows=1:3) - @test mach.state == 2 - - # however changing an upstream hyperparameter forces reformatting - # and resampling: - model.scaler.scaling = 3.1 - @test_logs((:info, "reformatting X, y"), - (:info, "resampling X, y"), - fit!(mach, verbosity=0, rows=1:3)) - -end - -@testset "operation nodes that are source nodes" begin - - mutable struct BananaComposite <: UnsupervisedComposite - stand - end - BananaComposite(; stand=Standardizer()) = BananaComposite(stand) - - function MLJBase.fit(model::BananaComposite, verbosity, X) - - Xs = source(X) - mach1 = machine(model.stand, Xs) - X2 = transform(mach1, Xs) - - # node for the inverse_transform: - - network_mach = machine(Unsupervised(), Xs, transform=X2, inverse_transform=Xs) - return!(network_mach, model, verbosity; depwarn) - - end - - X = (x = Float64[1, 2, 3],) - mach = machine(BananaComposite(), X) - fit!(mach, verbosity=0, force=true) - @test transform(mach, X).x ≈ Float64[-1, 0, 1] - @test inverse_transform(mach, X) == X - -end - -end # module -true diff --git a/test/composition/models/network_composite.jl b/test/composition/models/network_composite.jl index 87e064df..26f0d4c6 100644 --- a/test/composition/models/network_composite.jl +++ b/test/composition/models/network_composite.jl @@ -1,4 +1,4 @@ -module TestNetowrkComposite +module TestNetoworkComposite using Test using MLJBase @@ -9,6 +9,7 @@ using Tables using MLJModelInterface using CategoricalArrays using OrderedCollections +using StatisticalMeasures using Serialization const MMI = MLJModelInterface @@ -645,6 +646,39 @@ end end +# # STATIC MODEL WITH MULTIPLE INPUTS + +mutable struct Balancer <: Static end +MLJBase.transform(::Balancer, _, X, y) = (selectrows(X, 1:2), selectrows(y, 1:2)) + +struct ThinWrapper <: StaticNetworkComposite + balancer +end + +function MLJBase.prefit(wrapper::ThinWrapper, verbosity) + + data = source() # empty source because there is no training data + Xs = first(data) + ys = last(data) + + mach=machine(:balancer) + + output = transform(mach, Xs, ys) + + (; transform = output) + +end + +balancer = Balancer() +wrapper = ThinWrapper(balancer) + +X, y = make_blobs() +mach = machine(wrapper) +Xunder, yunder = transform(mach, X, y) +@test Xunder == selectrows(X, 1:2) +@test yunder == selectrows(y, 1:2) + + # # MACHINE INTEGRATION TESTS @@ -795,7 +829,7 @@ end # Test data as been erased at the first and second level of composition for submach in machines(glb(smach.fitresult)) TestUtilities.test_data(submach) - if submach isa Machine{<:Composite} + if submach isa Machine{<:NetworkComposite} for subsubmach in machines(glb(submach.fitresult)) TestUtilities.test_data(subsubmach) end diff --git a/test/composition/models/pipelines.jl b/test/composition/models/pipelines.jl index 8fb793ee..faaf3d4c 100644 --- a/test/composition/models/pipelines.jl +++ b/test/composition/models/pipelines.jl @@ -113,7 +113,6 @@ end @test_logs @test Pipeline(m, t, u, d, u) isa DeterministicPipeline # named components: - @test_throws MLJBase.ERR_USING_TARGET_KWARG Pipeline(target=u) @test Pipeline(c1=m, c2=t, c3=u) isa UnsupervisedPipeline @test Pipeline(c1=m, c2=t, c3=u, c5=p) isa ProbabilisticPipeline @test Pipeline(c1=m, c2=t) isa StaticPipeline diff --git a/test/composition/models/stacking.jl b/test/composition/models/stacking.jl index 6cbe6588..ca973775 100644 --- a/test/composition/models/stacking.jl +++ b/test/composition/models/stacking.jl @@ -2,11 +2,11 @@ module TestStacking using Test using MLJBase +using StatisticalMeasures using MLJModelInterface using ..Models using Random using StableRNGs - import Distributions rng = StableRNGs.StableRNG(1234) @@ -31,7 +31,7 @@ function test_internal_evaluation(internalreport, std_evaluation, modelnames) @test model_ev isa PerformanceEvaluation @test model_ev.per_fold == std_ev.per_fold @test model_ev.measurement == std_ev.measurement - @test model_ev.per_observation[1] === std_ev.per_observation[1] === missing + @test model_ev.per_observation[1] == std_ev.per_observation[1] @test model_ev.per_observation[2] == std_ev.per_observation[2] @test model_ev.operation == std_ev.operation @test model_ev.report_per_fold == std_ev.report_per_fold diff --git a/test/composition/models/static_transformers.jl b/test/composition/models/static_transformers.jl index c0162950..072dcbca 100644 --- a/test/composition/models/static_transformers.jl +++ b/test/composition/models/static_transformers.jl @@ -5,6 +5,7 @@ using Test using MLJBase using ..Models using CategoricalArrays +using StatisticalMeasures import Random.seed! seed!(1234) diff --git a/test/composition/models/transformed_target_model.jl b/test/composition/models/transformed_target_model.jl index 12b1391b..b640f922 100644 --- a/test/composition/models/transformed_target_model.jl +++ b/test/composition/models/transformed_target_model.jl @@ -18,10 +18,6 @@ whitener = UnivariateStandardizer() TransformedTargetModel(atom), ) @test_logs TransformedTargetModel(atom, transformer=UnivariateStandardizer) - model = @test_logs( - (:warn, MLJBase.WARN_TARGET_DEPRECATED), - TransformedTargetModel(atom, target=whitener), - ) model = @test_logs TransformedTargetModel(atom, transformer=whitener) @test model.model == atom @test model.inverse == nothing diff --git a/test/default_measures.jl b/test/default_measures.jl new file mode 100644 index 00000000..28a28b5d --- /dev/null +++ b/test/default_measures.jl @@ -0,0 +1,42 @@ +mutable struct DRegressor <: Deterministic end +MLJBase.target_scitype(::Type{<:DRegressor}) = + AbstractVector{<:Union{Missing,Continuous}} + +mutable struct D2Regressor <: Deterministic end +MLJBase.target_scitype(::Type{<:D2Regressor}) = + AbstractVector{<:Union{Missing,Continuous}} + +mutable struct DClassifier <: Deterministic end +MLJBase.target_scitype(::Type{<:DClassifier}) = + AbstractVector{<:Union{Missing,Finite}} + +mutable struct DClassifierWeird <: Deterministic end +MLJBase.target_scitype(::Type{<:DClassifierWeird}) = + AbstractVector{<:Textual} + +mutable struct PClassifier <: Probabilistic end +MLJBase.target_scitype(::Type{<:PClassifier}) = + AbstractVector{<:Union{Missing,Finite}} + +mutable struct PRegressor <: Probabilistic end +MLJBase.target_scitype(::Type{<:PRegressor}) = + AbstractVector{<:Union{Missing,Continuous}} + +mutable struct PCountRegressor <: Probabilistic end +MLJBase.target_scitype(::Type{<:PCountRegressor}) = + AbstractVector{<:Union{Missing,Count}} + + + +@testset "default_measure" begin + @test MLJBase.default_measure(DRegressor()) == l2 + @test MLJBase.default_measure(D2Regressor()) == l2 + @test MLJBase.default_measure(DClassifier()) == misclassification_rate + @test MLJBase.default_measure(PClassifier()) == log_loss + @test MLJBase.default_measure(PRegressor()) == log_loss + @test MLJBase.default_measure(PCountRegressor()) == log_loss + @test isnothing(MLJBase.default_measure(DClassifierWeird())) + @test isnothing(MLJBase.default_measure("junk")) +end + +true diff --git a/test/hyperparam/one_dimensional_ranges.jl b/test/hyperparam/one_dimensional_ranges.jl index 5f7506ea..1567f91e 100644 --- a/test/hyperparam/one_dimensional_ranges.jl +++ b/test/hyperparam/one_dimensional_ranges.jl @@ -116,5 +116,16 @@ end range(any1, :any, lower=1, upper=10)) end +@testset "coverage" begin + io = IOBuffer() + r1 = range(Int, :junk, lower=1, upper=10) + r2 = range(Char, :junk, values=['c', 'd']) + show(io, r1) + @test String(take!(io)) == "NumericRange(1 ≤ junk ≤ 10; origin=5.5, unit=4.5)" + show(io, r2) + @test String(take!(io)) == "NominalRange(junk = c, d)" + close(io) +end + end true diff --git a/test/interface/model_api.jl b/test/interface/model_api.jl index 9bf3e0bf..8966f70f 100644 --- a/test/interface/model_api.jl +++ b/test/interface/model_api.jl @@ -2,6 +2,7 @@ module TestModelAPI using Test using MLJBase +using StatisticalMeasures import MLJModelInterface using ..Models using Distributions @@ -77,7 +78,7 @@ UnivariateFiniteFitter(;alpha=1.0) = UnivariateFiniteFitter(alpha) yhat = predict(mach, nothing) # single UnivariateFinite distribution @test cross_entropy(fill(yhat, 3), ytest) ≈ - [-log(1/2), -log(1/2), -log(1/4)] + mean([-log(1/2), -log(1/2), -log(1/4)]) end diff --git a/test/machines.jl b/test/machines.jl index 16655d26..7d0845c2 100644 --- a/test/machines.jl +++ b/test/machines.jl @@ -7,6 +7,7 @@ using ..Models using StableRNGs using Serialization using ..TestUtilities +using StatisticalMeasures const MLJModelInterface = MLJBase.MLJModelInterface const MMI = MLJModelInterface diff --git a/test/measures/confusion_matrix.jl b/test/measures/confusion_matrix.jl deleted file mode 100644 index 3e7d9b7f..00000000 --- a/test/measures/confusion_matrix.jl +++ /dev/null @@ -1,116 +0,0 @@ -using Test -using MLJBase -include(joinpath("..", "..", "test", "_models", "models.jl")) -using .Models - -@testset "_categorical" begin - a = [1, 1, 2, 3] - b = [3, 3, 4, 5] - c = [missing, a...] - d = [missing, b...] - e = categorical(a) - f = categorical(b) - g = categorical(c) - h = categorical(d) - j = CategoricalArrays.CategoricalValue{Int64, UInt32}[e[1], e[1], e[1], e[1]] - k = CategoricalArrays.CategoricalValue{Int64, UInt32}[e[4], e[4], e[4], e[4]] - rhs = (Set(1:5), Set(1:5)) - @test Set.(levels.(MLJBase._categorical(a, b))) == rhs - @test Set.(levels.(MLJBase._categorical(a, d))) == rhs - @test Set.(levels.(MLJBase._categorical(c, b))) == rhs - @test Set.(levels.(MLJBase._categorical(c, d))) == rhs - @test Set.(levels.(MLJBase._categorical(a, f))) == rhs - @test Set.(levels.(MLJBase._categorical(a, h))) == rhs - @test Set.(levels.(MLJBase._categorical(b, a))) == rhs - @test Set.(levels.(MLJBase._categorical(d, a))) == rhs - @test Set.(levels.(MLJBase._categorical(b, c))) == rhs - @test Set.(levels.(MLJBase._categorical(d, c))) == rhs - @test Set.(levels.(MLJBase._categorical(f, a))) == rhs - @test Set.(levels.(MLJBase._categorical(h, a))) == rhs - - @test Set.(levels.(MLJBase._categorical(j, k))) == (Set(1:3), Set(1:3)) - - # case of ordinary vector with CategoricalValue eltype: - acv = CategoricalArrays.CategoricalVector -end - -@testset "basics" begin - yraw = ['m', 'm', 'f', 'n', missing, 'f', 'm', 'n', 'n', 'm', 'f'] - ŷraw = [missing, 'f', 'f', 'm', 'f', 'f', 'n', 'm', 'n', 'm', 'f'] - y = categorical(yraw) - ŷ = categorical(ŷraw) - l = levels(y) # f, m, n - cm = MLJBase._confmat(ŷ, y; warn=false) - ŷ_clean, y_clean = MLJBase.skipinvalid(ŷ, y) - ee(l,i,j) = sum((ŷ_clean .== l[i]) .& (y_clean .== l[j])) - for i in 1:3, j in 1:3 - @test cm[i,j] == ee(l,i,j) - end - - cm2 = @test_logs (:warn, r"The classes are") MLJBase._confmat(ŷraw, yraw) - @test cm2.mat == cm.mat - - perm = [3, 1, 2] - l2 = l[perm] - cm2 = @test_logs MLJBase._confmat(ŷ, y; perm=perm) - m = ConfusionMatrix(perm=perm) - for i in 1:3, j in 1:3 - @test cm2[i,j] == ee(l2,i,j) - end - @test_logs (:warn, r"The classes are un") MLJBase._confmat(ŷ, y) - ŷc = coerce(ŷ, Union{Missing,OrderedFactor}) - yc = coerce(y, Union{Missing,OrderedFactor}) - @test MLJBase._confmat(ŷc, yc).mat == cm.mat - - y = categorical(['a','b','a','b']) - ŷ = categorical(['b','b','a','a']) - @test_logs (:warn, r"The classes are un") MLJBase._confmat(ŷ, y) - - # more tests for coverage - y = categorical([1,2,3,1,2,3,1,2,3]) - ŷ = categorical([1,2,3,1,2,3,1,2,3]) - @test_throws ArgumentError MLJBase._confmat(ŷ, y, rev=true) - - # silly test for display - ŷ = coerce(y, OrderedFactor) - y = coerce(y, OrderedFactor) - iob = IOBuffer() - Base.show(iob, MIME("text/plain"), MLJBase._confmat(ŷ, y)) - siob = String(take!(iob)) - @test strip(siob) == strip(""" - ┌──────────────┐ - │ Ground Truth │ - ┌─────────┼────┬────┬────┤ - │Predicted│ 1 │ 2 │ 3 │ - ├─────────┼────┼────┼────┤ - │ 1 │ 3 │ 0 │ 0 │ - ├─────────┼────┼────┼────┤ - │ 2 │ 0 │ 3 │ 0 │ - ├─────────┼────┼────┼────┤ - │ 3 │ 0 │ 0 │ 3 │ - └─────────┴────┴────┴────┘""") -end - -@testset "ConfusionMatrix measure" begin - - @test info(confmat).orientation == :other - model = DeterministicConstantClassifier() - - X = (x=rand(10),) - long = categorical(collect("abbaacaabbbbababcbac"), ordered=true) - y = long[1:10] - yhat =long[11:20] - - @test confmat(yhat, y).mat == [1 2 0; 3 1 1; 1 1 0] - @test ConfusionMatrix(perm=[2, 1, 3])(yhat, y).mat == - MLJBase._confmat(yhat, y, perm=[2, 1, 3]).mat - - MLJBase.value(confmat, yhat, X, y, nothing) - - e = evaluate(model, X, y, - measures=[misclassification_rate, confmat], - resampling=Holdout(fraction_train=0.5)) - cm = e.measurement[2] - @test cm.labels == ["a", "b", "c"] - @test cm.mat == [2 2 1; 0 0 0; 0 0 0] -end diff --git a/test/measures/continuous.jl b/test/measures/continuous.jl deleted file mode 100644 index 3e645845..00000000 --- a/test/measures/continuous.jl +++ /dev/null @@ -1,31 +0,0 @@ -rng = StableRNG(666899) - -@testset "regressor measures" begin - y = [1, 42, 2, 3, missing, 4] - yhat = [4, NaN, 3, 2, 42, 1] - w = [1, 42, 2, 4, 42, 3] - y = [1, 2, 3, 4] - yhat = [4, 3, 2, 1] - w = [1, 2, 4, 3] - @test isapprox(mae(yhat, y), 2) - @test isapprox(mae(yhat, y, w), (1*3 + 2*1 + 4*1 + 3*3)/4) - @test isapprox(rms(yhat, y), sqrt(5)) - @test isapprox(rms(yhat, y, w), sqrt((1*3^2 + 2*1^2 + 4*1^2 + 3*3^2)/4)) - @test rsq(yhat, y) == -3 - @test isapprox(mean(skipinvalid(l1(yhat, y))), 2) - @test isapprox(mean(skipinvalid(l1(yhat, y, w))), mae(yhat, y, w)) - @test isapprox(mean(skipinvalid(l2(yhat, y))), 5) - @test isapprox(mean(skipinvalid(l2(yhat, y, w))), rms(yhat, y, w)^2) - @test isapprox(mean(skipinvalid(log_cosh(yhat, y))), 1.3715546675) - - y = [1, 42, 2, 3, missing, 4] - yhat = [2, NaN, 3, 4, 42, 5] - @test isapprox(rmsl(yhat, y), - sqrt((log(1/2)^2 + log(2/3)^2 + log(3/4)^2 + log(4/5)^2)/4)) - @test isapprox(rmslp1(yhat, y), - sqrt((log(2/3)^2 + log(3/4)^2 + log(4/5)^2 + log(5/6)^2)/4)) - @test isapprox(rmsp(yhat, y), sqrt((1 + 1/4 + 1/9 + 1/16)/4)) - @test isapprox(mape(yhat, y), (1/1 + 1/2 + 1/3 + 1/4)/4) -end - -true diff --git a/test/measures/doc_strings.jl b/test/measures/doc_strings.jl deleted file mode 100644 index 1cbf96c4..00000000 --- a/test/measures/doc_strings.jl +++ /dev/null @@ -1,9 +0,0 @@ -using MLJBase - -docstring = (Base.Docs.doc)((Base.Docs.Binding)(Main, :multiclass_recall)) - -@test string(docstring) == "An instance of type "* - "[`MulticlassTruePositiveRate`](@ref). Query the "* - "[`MulticlassTruePositiveRate`](@ref) doc-string for details. \n" - -true diff --git a/test/measures/finite.jl b/test/measures/finite.jl deleted file mode 100644 index f06266c3..00000000 --- a/test/measures/finite.jl +++ /dev/null @@ -1,609 +0,0 @@ -rng = StableRNG(51803) - -const Vec = AbstractVector - -@testset "misclassification_rate" begin - y = categorical(collect("asdfasdfaaassdd")) - yhat = categorical(collect("asdfaadfaasssdf")) - w = 1:15 - ym = vcat(y, [missing,]) - yhatm = vcat(yhat, [missing,]) - wm = 1:16 - @test misclassification_rate(yhat, y) ≈ 0.2 - @test misclassification_rate(yhatm, ym) ≈ 0.2 - @test misclassification_rate(yhat, y, w) ≈ (6*1 + 11*1 + 15*1) / 15 - @test misclassification_rate(yhatm, ym, wm) ≈ (6*1 + 11*1 + 15*1) / 15 -end - -@testset "mcr, acc, bacc, mcc" begin - y = categorical(['m', 'f', 'n', 'f', 'm', 'n', 'n', 'm', 'f']) - ŷ = categorical(['f', 'f', 'm', 'f', 'n', 'm', 'n', 'm', 'f']) - @test accuracy(ŷ, y) == 1-mcr(ŷ,y) == - accuracy(MLJBase._confmat(ŷ, y, warn=false)) == - 1-mcr(MLJBase._confmat(ŷ, y, warn=false)) - w = randn(rng,length(y)) - @test accuracy(ŷ, y, w) == 1-mcr(ŷ,y,w) - - ## balanced accuracy - y = categorical([ - 3, 4, 1, 1, 1, 4, 1, 3, 3, 1, 2, 3, 1, 3, 3, 3, 2, 4, 3, 2, 1, 3, - 3, 1, 1, 1, 2, 4, 1, 4, 4, 4, 1, 1, 4, 4, 3, 1, 2, 2, 3, 4, 2, 1, - 2, 2, 3, 2, 2, 3, 1, 2, 3, 4, 1, 2, 4, 2, 1, 4, 3, 2, 3, 3, 3, 1, - 3, 1, 4, 3, 1, 2, 3, 1, 2, 2, 4, 4, 1, 3, 2, 1, 4, 3, 3, 1, 3, 1, - 2, 2, 2, 2, 2, 3, 2, 1, 1, 4, 2, 2]) - ŷ = categorical([ - 2, 3, 2, 1, 2, 2, 3, 3, 2, 4, 2, 3, 2, 4, 3, 4, 4, 2, 1, 3, 3, 3, - 3, 3, 2, 4, 4, 3, 4, 4, 1, 2, 3, 2, 4, 1, 2, 3, 1, 4, 2, 2, 1, 2, - 3, 2, 2, 4, 3, 2, 2, 2, 1, 2, 2, 1, 3, 1, 4, 1, 2, 1, 2, 4, 3, 2, - 4, 3, 2, 4, 4, 2, 4, 3, 2, 3, 1, 2, 1, 2, 1, 2, 3, 1, 1, 3, 4, 2, - 4, 4, 2, 1, 3, 2, 2, 4, 1, 1, 4, 1]) - w = [ - 0.5, 1.4, 0.6, 1. , 0.1, 0.5, 1.2, 0.2, 1.8, 0.3, 0.6, 2.2, 0.1, - 1.4, 0.2, 0.4, 0.6, 2.1, 0.7, 0.2, 0.9, 0.4, 0.7, 0.3, 0.1, 1.7, - 0.2, 0.7, 1.2, 1. , 0.9, 0.4, 0.5, 0.5, 0.5, 1. , 0.3, 0.1, 0.2, - 0. , 2.2, 0.8, 0.9, 0.8, 1.3, 0.2, 0.4, 0.7, 1. , 0.7, 1.7, 0.7, - 1.1, 1.8, 0.1, 1.2, 1.8, 1. , 0.1, 0.5, 0.6, 0.7, 0.6, 1.2, 0.6, - 1.2, 0.5, 0.5, 0.8, 0.2, 0.6, 1. , 0.3, 1. , 0.2, 1.1, 1.1, 1.1, - 0.6, 1.4, 1.2, 0.3, 1.1, 0.2, 0.5, 1.6, 0.3, 1. , 0.3, 0.9, 0.9, - 0. , 0.6, 0.6, 0.4, 0.5, 0.4, 0.2, 0.9, 0.4] - sk_bacc = 0.17493386243386244 # note: sk-learn reverses ŷ and y - @test bacc(ŷ, y) ≈ sk_bacc - sk_adjusted_bacc = -0.10008818342151675 - @test BalancedAccuracy(adjusted=true)(ŷ, y) ≈ sk_adjusted_bacc - sk_bacc_w = 0.1581913163016446 - @test bacc(ŷ, y, w) ≈ sk_bacc_w - sk_adjusted_bacc_w = -0.1224115782644738 - @test BalancedAccuracy(adjusted=true)(ŷ, y, w) ≈ sk_adjusted_bacc_w - - ## matthews correlation - sk_mcc = -0.09759509982785947 - @test mcc(ŷ, y) == matthews_correlation(ŷ, y) ≈ sk_mcc - # invariance with respect to permutation ? - cm = MLJBase._confmat(ŷ, y, perm=[3, 1, 2, 4]) - @test mcc(cm) ≈ sk_mcc - - # Issue #381 - cm = MLJBase.ConfusionMatrixObject([29488 13017; 12790 29753], ["0.0", "1.0"]) - @test mcc(cm) ≈ 0.39312321239417797 -end - -@testset "kappa" begin - # Binary case - y_b = categorical([2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2]) - ŷ_b = categorical([1, 1, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2]) - cm_b = MLJBase._confmat(y_b, ŷ_b, warn=false) - p0_b = (4+10)/30 - pe_b = (13*11 + 17*19)/(30*30) - - # Multiclass case - y_m = categorical([5, 5, 3, 5, 4, 4, 2, 2, 3, 2, 5, 2, 4, 3, 2, 1, 1, 5, 1, 4, 2, 5, 4, 5, 2, 3, 3, 4, 2, 4]) - ŷ_m = categorical([1, 1, 1, 5, 4, 2, 1, 3, 4, 4, 2, 5, 4, 4, 1, 5, 5, 2, 3, 3, 1, 3, 2, 5, 5, 2, 3, 2, 5, 3]) - cm_m = MLJBase._confmat(ŷ_m, y_m, warn=false) - p0_m = 5/30 - pe_m = (3*6 + 8*6 + 5*6 + 7*5 + 7*7)/(30*30) - - # Tests - @test kappa(y_m, ŷ_m) ≈ (p0_m - pe_m)/(1 - pe_m) - @test kappa(y_b, ŷ_b) ≈ (p0_b - pe_b)/(1 - pe_b) - @test kappa(cm_m) == kappa(y_m, ŷ_m) - @test kappa(cm_b) == kappa(y_b, ŷ_b) - @test kappa(ŷ_m, y_m) == kappa(y_m, ŷ_m) - @test kappa(ŷ_b, y_b) == kappa(y_b, ŷ_b) - @test kappa(y_m, y_m) == 1.0 - @test kappa(y_b, y_b) == 1.0 -end - -@testset "confusion matrix {2}" begin - # first class is 1 is assumed negative, second positive - y = categorical([1, 2, 1, 2, 1, 1, 2]) - ŷ = categorical([1, 2, 2, 2, 2, 1, 2]) - cm = MLJBase._confmat(ŷ, y, warn=false) - TN = sum(ŷ .== y .== 1) # pred and true = - (1) - TP = sum(ŷ .== y .== 2) # pred and true = + (2) - FP = sum(ŷ .!= y .== 1) # pred + (2) and true - (1) - FN = sum(ŷ .!= y .== 2) # pred - (1) and true + (2) - @test cm[1,1] == TN - @test cm[2,2] == TP - @test cm[1,2] == FN - @test cm[2,1] == FP - - ym = categorical([1, missing, 2, 1, 2, 1, 1, 1, 2]) - ŷm = categorical([1, 2, 2, 2, 2, missing, 2, 1, 2]) - cm = MLJBase._confmat(ŷ, y, warn=false) - TN = sum(skipmissing(ŷ .== y .== 1)) # pred and true = - (1) - TP = sum(skipmissing(ŷ .== y .== 2)) # pred and true = + (2) - FP = sum(skipmissing(ŷ .!= y .== 1)) # pred + (2) and true - (1) - FN = sum(skipmissing(ŷ .!= y .== 2)) # pred - (1) and true + (2) - @test cm[1,1] == TN - @test cm[2,2] == TP - @test cm[1,2] == FN - @test cm[2,1] == FP - - cm2 = MLJBase._confmat(ŷ, y; rev=true) - @test cm2[1,1] == cm[2,2] - @test cm2[1,2] == cm[2,1] - @test cm2[2,2] == cm[1,1] - @test cm2[2,1] == cm[1,2] - - @test accuracy(ŷ, y) == accuracy(cm) == sum(y .== ŷ) / length(y) - - @test @test_logs((:warn, r"The classes are un-ordered"), - recall(ŷ, y) == TP / (TP + FN)) - - ŷ = coerce(ŷ, Union{Missing,OrderedFactor}) - y = coerce(y, Union{Missing,OrderedFactor}) - - @test precision(ŷ, y) == TP / (TP + FP) - @test specificity(ŷ, y) == TN / (TN + FP) - @test f1score(ŷ, y) ≈ - 2.0 / (1.0 / recall(ŷ, y) + 1.0 / precision(ŷ, y)) - - recall_rev = Recall(rev=true) - @test recall_rev(ŷ, y) == - TN / (TN + FP) # no warning because rev is specified - precision_rev = Precision(rev=true) - @test precision_rev(ŷ, y) == TN / (TN + FN) - specificity_rev = Specificity(rev=true) - @test specificity_rev(ŷ, y) == TP / (TP + FN) - f1score_rev = FScore(rev=true) - @test f1score_rev(ŷ, y) ≈ - 2.0 / (1.0 / recall_rev(ŷ, y) + 1.0 / precision_rev(ŷ, y)) -end - -@testset "confusion matrix {n}" begin - y = coerce([1, 2, 0, 2, 1, 0, 0, 1, 2, 2, 2, 1, 2, - 2, 1, 0, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, - 2, 2, 2], Multiclass) - ŷ = coerce([2, 0, 2, 2, 2, 0, 1, 2, 1, 2, 0, 1, 2, - 1, 1, 1, 2, 0, 1, 2, 1, 2, 2, 2, 1, 2, - 1, 2, 2], Multiclass) - class_w = Dict(0=>0,2=>2,1=>1) - cm = MLJBase._confmat(ŷ, y, warn=false) - - # ┌─────────────────────────────────────────┐ - # │ Ground Truth │ - # ┌─────────────┼─────────────┬─────────────┬─────────────┤ - # │ Predicted │ 0 │ 1 │ 2 │ - # ├─────────────┼─────────────┼─────────────┼─────────────┤ - # │ 0 │ 1 │ 1 │ 2 │ - # ├─────────────┼─────────────┼─────────────┼─────────────┤ - # │ 1 │ 2 │ 4 │ 4 │ - # ├─────────────┼─────────────┼─────────────┼─────────────┤ - # │ 2 │ 1 │ 6 │ 8 │ - # └─────────────┴─────────────┴─────────────┴─────────────┘ - - cm_tp = [1; 4; 8] - cm_tn = [22; 12; 8] - cm_fp = [1+2; 2+4; 1+6] - cm_fn = [2+1; 1+6; 2+4] - cm_prec = cm_tp ./ ( cm_tp + cm_fp ) - cm_rec = cm_tp ./ ( cm_tp + cm_fn ) - - # Check if is positive - m = MulticlassTruePositive(;return_type=Vector) - @test [0; 0; 0] <= m(ŷ, y) == cm_tp - m = MulticlassTrueNegative(;return_type=Vector) - @test [0; 0; 0] <= m(ŷ, y) == cm_tn - m = MulticlassFalsePositive(;return_type=Vector) - @test [0; 0; 0] <= m(ŷ, y) == cm_fp - m = MulticlassFalseNegative(;return_type=Vector) - @test [0; 0; 0] <= m(ŷ, y) == cm_fn - - # Check if is in [0,1] - m = MulticlassTruePositiveRate(average=no_avg;return_type=Vector) - @test [0; 0; 0] <= m(ŷ, y) == cm_tp ./ (cm_fn.+cm_tp) <= [1; 1; 1] - m = MulticlassTrueNegativeRate(average=no_avg;return_type=Vector) - @test [0; 0; 0] <= m(ŷ, y) == cm_tn ./ (cm_tn.+cm_fp) <= [1; 1; 1] - m = MulticlassFalsePositiveRate(average=no_avg;return_type=Vector) - @test [0; 0; 0] <= m(ŷ, y) == 1 .- cm_tn ./ (cm_tn.+cm_fp) <= [1; 1; 1] - m = MulticlassFalseNegativeRate(average=no_avg;return_type=Vector) - @test [0; 0; 0] <= m(ŷ, y) == 1 .- cm_tp ./ (cm_fn.+cm_tp) <= [1; 1; 1] - - #`no_avg` and `LittleDict` - @test collect(values(MulticlassPrecision(average=no_avg)(cm))) ≈ - collect(values(MulticlassPrecision(average=no_avg)(ŷ, y))) ≈ - cm_prec - @test MulticlassPrecision(average=macro_avg)(cm) ≈ - MulticlassPrecision(average=macro_avg)(ŷ, y) ≈ mean(cm_prec) - @test collect(keys(MulticlassPrecision(average=no_avg)(cm))) == - collect(keys(MulticlassPrecision(average=no_avg)(ŷ, y))) == - ["0"; "1"; "2"] - @test collect(values(MulticlassRecall(average=no_avg)(cm))) ≈ - collect(values(MulticlassRecall(average=no_avg)(ŷ, y))) ≈ - cm_rec - @test collect(values(MulticlassFScore(average=no_avg)(cm))) ≈ - collect(values(MulticlassFScore(average=no_avg)(ŷ, y))) ≈ - 2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec ) - - #`no_avg` and `LittleDict` with class weights - @test collect(values(MulticlassPrecision(average=no_avg)(cm, class_w))) ≈ - collect(values(MulticlassPrecision(average=no_avg)(ŷ, y, class_w))) ≈ - cm_prec .* [0; 1; 2] - @test collect(values(MulticlassRecall(average=no_avg)(cm, class_w))) ≈ - collect(values(MulticlassRecall(average=no_avg)(ŷ, y, class_w))) ≈ - cm_rec .* [0; 1; 2] - @test collect(values(MulticlassFScore(average=no_avg)(cm, class_w))) ≈ - collect(values(MulticlassFScore(average=no_avg)(ŷ, y, class_w))) ≈ - 2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec ) .* [0; 1; 2] - - #`macro_avg` and `LittleDict` - macro_prec = MulticlassPrecision(average=macro_avg) - macro_rec = MulticlassRecall(average=macro_avg) - - @test macro_prec(cm) ≈ macro_prec(ŷ, y) ≈ mean(cm_prec) - @test macro_rec(cm) ≈ macro_rec(ŷ, y) ≈ mean(cm_rec) - @test macro_f1score(cm) ≈ macro_f1score(ŷ, y) ≈ mean(2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec )) - - #`micro_avg` and `LittleDict` - micro_prec = MulticlassPrecision(average=micro_avg) - micro_rec = MulticlassRecall(average=micro_avg) - - @test micro_prec(cm) == micro_prec(ŷ, y) == sum(cm_tp) ./ sum(cm_fp.+cm_tp) - @test micro_rec(cm) == micro_rec(ŷ, y) == sum(cm_tp) ./ sum(cm_fn.+cm_tp) - @test micro_f1score(cm) == micro_f1score(ŷ, y) == - 2 ./ ( 1 ./ ( sum(cm_tp) ./ sum(cm_fp.+cm_tp) ) + 1 ./ ( sum(cm_tp) ./ sum(cm_fn.+cm_tp) ) ) - - #`no_avg` and `Vector` with class weights - vec_precision = MulticlassPrecision(return_type=Vector) - vec_recall = MulticlassRecall(return_type=Vector) - vec_f1score = MulticlassFScore(return_type=Vector) - - @test vec_precision(cm, class_w) ≈ vec_precision(ŷ, y, class_w) ≈ - mean(cm_prec .* [0; 1; 2]) - @test vec_recall(cm, class_w) ≈ vec_recall(ŷ, y, class_w) ≈ - mean(cm_rec .* [0; 1; 2]) - @test vec_f1score(cm, class_w) ≈ vec_f1score(ŷ, y, class_w) ≈ - mean(2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec ) .* [0; 1; 2]) - - #`macro_avg` and `Vector` - v_ma_prec = MulticlassPrecision(average=macro_avg, - return_type=Vector) - v_ma_rec = MulticlassRecall(average=macro_avg, return_type=Vector) - v_ma_f1 = MulticlassFScore(average=macro_avg, return_type=Vector) - - @test v_ma_prec(cm) ≈ v_ma_prec(ŷ, y) ≈ mean(cm_prec) - @test v_ma_rec(cm) ≈ v_ma_rec(ŷ, y) ≈ mean(cm_rec) - @test v_ma_f1(cm) ≈ v_ma_f1(ŷ, y) ≈ mean(2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec )) - - #`macro_avg` and `Vector` with class weights - @test v_ma_prec(cm, class_w) ≈ v_ma_prec(ŷ, y, class_w) ≈ - mean(cm_prec .* [0, 1, 2]) - @test v_ma_rec(cm, class_w) ≈ v_ma_rec(ŷ, y, class_w) ≈ - mean(cm_rec .* [0, 1, 2]) - @test v_ma_f1(cm, class_w) ≈ v_ma_f1(ŷ, y, class_w) ≈ - mean(2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec ) .* [0, 1, 2]) - - #`micro_avg` and `Vector` - v_mi_prec = MulticlassPrecision(average=micro_avg, return_type=Vector) - v_mi_rec = MulticlassRecall(average=micro_avg, return_type=Vector) - v_mi_f1 = MulticlassFScore(average=micro_avg, return_type=Vector) - - @test v_mi_prec(cm) == v_mi_prec(ŷ, y) == sum(cm_tp) ./ sum(cm_fp.+cm_tp) - @test v_mi_rec(cm) == v_mi_rec(ŷ, y) == sum(cm_tp) ./ sum(cm_fn.+cm_tp) - @test v_mi_f1(cm) == v_mi_f1(ŷ, y) == - 2 ./ ( 1 ./ ( sum(cm_tp) ./ sum(cm_fp.+cm_tp) ) + 1 ./ ( sum(cm_tp) ./ sum(cm_fn.+cm_tp) ) ) -end - -@testset "issue #630" begin - # multiclass fscore corner case of absent class - - y = coerce([1, 2, 2, 2, 3], OrderedFactor)[1:4] - # [1, 2, 2, 2] # but 3 is in the pool - yhat = reverse(y) - # [2, 2, 2, 1] - - # In this case, assigning "3" as "positive" gives all true negative, - # and so NaN for that class's contribution to the average F1Score, - # which should accordingly be skipped. - - # postive class | TP | FP | FN | score for that class - # --------------|----|----|----|--------------------- - # 1 | 0 | 1 | 2 | 0 - # 2 | 2 | 1 | 1 | 2/3 - # 3 | 0 | 0 | 0 | NaN - - # mean score with skippin NaN is 1/3 - @test MulticlassFScore()(yhat, y) ≈ 1/3 -end - -@testset "Metadata binary" begin - for m in (accuracy, recall, Precision(), f1score, specificity) - e = info(m) - m == accuracy && (@test e.name == "Accuracy") - m == recall && (@test e.name == "TruePositiveRate") - m isa Precision && (@test e.name == "Precision") - m == f1score && (@test e.name == "FScore") - m == specificity && (@test e.name == "TrueNegativeRate") - @test e.target_scitype <: AbstractArray{<:Union{Missing,Finite}} - @test e.prediction_type == :deterministic - @test e.orientation == :score - @test e.reports_each_observation == false - @test e.is_feature_dependent == false - if m == accuracy - @test e.supports_weights - else - @test !e.supports_weights - end - end - e = info(auc) - @test e.name == "AreaUnderCurve" - @test e.target_scitype == - Union{AbstractArray{<:Union{Missing,Multiclass{2}}}, - AbstractArray{<:Union{Missing,OrderedFactor{2}}}} - @test e.prediction_type == :probabilistic - @test e.reports_each_observation == false - @test e.is_feature_dependent == false - @test e.supports_weights == false -end - -@testset "Metadata multiclass" begin - for m in (MulticlassRecall(), MulticlassPrecision(), - MulticlassFScore(), MulticlassTrueNegativeRate()) - e = info(m) - m isa MulticlassRecall && - (@test e.name == "MulticlassTruePositiveRate") - m isa MulticlassPrecision && - (@test e.name == "MulticlassPrecision") - m isa MulticlassFScore && - (@test e.name == "MulticlassFScore") - m isa MulticlassTrueNegativeRate && - (@test e.name == "MulticlassTrueNegativeRate") - @test e.target_scitype <: AbstractArray{<:Union{Missing,Finite}} - @test e.prediction_type == :deterministic - @test e.orientation == :score - @test e.reports_each_observation == false - @test e.is_feature_dependent == false - @test e.supports_weights == false - @test e.supports_class_weights == true - end -end - -@testset "More binary metrics" begin - y = coerce([missing, 1, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 2, - 2, 1, 2, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, - 2, 2, 2, 1], Union{Missing,OrderedFactor}) - ŷ = coerce([1, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, - 1, 1, 1, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, - 1, 2, 2, missing], Union{Missing,OrderedFactor}) - - # check all constructors - m = TruePositive() - @test m(ŷ, y) == truepositive(ŷ, y) - m = TruePositive(rev=true) - @test m(ŷ, y) == truenegative(ŷ, y) - m = TrueNegative() - @test m(ŷ, y) == truenegative(ŷ, y) - m = FalsePositive() - @test m(ŷ, y) == falsepositive(ŷ, y) - m = FalseNegative() - @test m(ŷ, y) == falsenegative(ŷ, y) - m = TruePositiveRate() - @test m(ŷ, y) == tpr(ŷ, y) == truepositive_rate(ŷ, y) - m = TrueNegativeRate() - @test m(ŷ, y) == tnr(ŷ, y) == truenegative_rate(ŷ, y) - m = FalsePositiveRate() - @test m(ŷ, y) == fpr(ŷ, y) == falsepositive_rate(ŷ, y) - m = FalseNegativeRate() - @test m(ŷ, y) == fnr(ŷ, y) == falsenegative_rate(ŷ, y) - m = FalseDiscoveryRate() - @test m(ŷ, y) == fdr(ŷ, y) == falsediscovery_rate(ŷ, y) - m = Precision() - @test m(ŷ, y) == precision(ŷ, y) - m = NPV() - @test m(ŷ, y) == npv(ŷ, y) - m = FScore() - @test m(ŷ, y) == f1score(ŷ, y) - # check synonyms - m = TPR() - @test m(ŷ, y) == tpr(ŷ, y) - m = TNR() - @test m(ŷ, y) == tnr(ŷ, y) - m = FPR() - @test m(ŷ, y) == fpr(ŷ, y) == fallout(ŷ, y) - m = FNR() - @test m(ŷ, y) == fnr(ŷ, y) == miss_rate(ŷ, y) - m = FDR() - @test m(ŷ, y) == fdr(ŷ, y) - m = PPV() - @test m(ŷ, y) == precision(ŷ, y) == ppv(ŷ, y) - m = Recall() - @test m(ŷ, y) == tpr(ŷ, y) == recall(ŷ, y) == - sensitivity(ŷ, y) == hit_rate(ŷ, y) - m = Specificity() - @test m(ŷ, y) == tnr(ŷ, y) == specificity(ŷ, y) == selectivity(ŷ, y) - # 'higher order' - m = BACC() - @test m(ŷ, y) == bacc(ŷ, y) == (tpr(ŷ, y) + tnr(ŷ, y))/2 - - ### External comparisons - sk_prec = 0.6111111111111112 # m.precision_score(y, yhat, pos_label=2) - @test precision(ŷ, y) ≈ sk_prec - sk_rec = 0.6875 - @test recall(ŷ, y) == sk_rec # m.recall_score(y, yhat, pos_label=2) - sk_f05 = 0.625 - f05 = FScore(β=0.5) - @test f05(ŷ, y) ≈ sk_f05 # m.fbeta_score(y, yhat, 0.5, pos_label=2) - - # reversion mechanism - sk_prec_rev = 0.5454545454545454 - prec_rev = Precision(rev=true) - @test prec_rev(ŷ, y) ≈ sk_prec_rev - sk_rec_rev = 0.46153846153846156 - rec_rev = Recall(rev=true) - @test rec_rev(ŷ, y) ≈ sk_rec_rev -end - -@testset "More multiclass metrics" begin - y = coerce(categorical([missing, 1, 2, 0, 2, 1, 0, 0, 1, 2, 2, 2, 1, 2, - 2, 1, 0, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, - 2, 2, 2, 0]), Union{Missing,Multiclass}) - ŷ = coerce(categorical([0, 2, 0, 2, 2, 2, 0, 1, 2, 1, 2, 0, 1, 2, - 1, 1, 1, 2, 0, 1, 2, 1, 2, 2, 2, 1, 2, - 1, 2, 2, missing]), Union{Missing,Multiclass}) - w = Dict(0=>1, 1=>2, 2=>3) #class_w - # check all constructors - m = MulticlassTruePositive() - @test m(ŷ, y) == multiclass_truepositive(ŷ, y) - m = MulticlassTrueNegative() - @test m(ŷ, y) == multiclass_truenegative(ŷ, y) - m = MulticlassFalsePositive() - @test m(ŷ, y) == multiclass_falsepositive(ŷ, y) - m = MulticlassFalseNegative() - @test m(ŷ, y) == multiclass_falsenegative(ŷ, y) - m = MulticlassTruePositiveRate() - @test m(ŷ, y) == multiclass_tpr(ŷ, y) == - multiclass_truepositive_rate(ŷ, y) - @test m(ŷ, y, w) == multiclass_tpr(ŷ, y, w) == - multiclass_truepositive_rate(ŷ, y, w) - m = MulticlassTrueNegativeRate() - @test m(ŷ, y) == multiclass_tnr(ŷ, y) == - multiclass_truenegative_rate(ŷ, y) - @test m(ŷ, y, w) == multiclass_tnr(ŷ, y, w) == - multiclass_truenegative_rate(ŷ, y, w) - m = MulticlassFalsePositiveRate() - @test m(ŷ, y) == multiclass_fpr(ŷ, y) == - multiclass_falsepositive_rate(ŷ, y) - @test m(ŷ, y, w) == multiclass_fpr(ŷ, y, w) == - multiclass_falsepositive_rate(ŷ, y, w) - m = MulticlassFalseNegativeRate() - @test m(ŷ, y) == multiclass_fnr(ŷ, y) == - multiclass_falsenegative_rate(ŷ, y) - @test m(ŷ, y, w) == multiclass_fnr(ŷ, y, w) == - multiclass_falsenegative_rate(ŷ, y, w) - m = MulticlassFalseDiscoveryRate() - @test m(ŷ, y) == multiclass_fdr(ŷ, y) == - multiclass_falsediscovery_rate(ŷ, y) - @test m(ŷ, y, w) == multiclass_fdr(ŷ, y, w) == - multiclass_falsediscovery_rate(ŷ, y, w) - m = MulticlassPrecision() - @test m(ŷ, y) == multiclass_precision(ŷ, y) - @test m(ŷ, y, w) == multiclass_precision(ŷ, y, w) - m = MulticlassNegativePredictiveValue() - @test m(ŷ, y) == multiclass_npv(ŷ, y) - @test m(ŷ, y, w) == multiclass_npv(ŷ, y, w) - m = MulticlassFScore() - @test m(ŷ, y) == macro_f1score(ŷ, y) - @test m(ŷ, y, w) == macro_f1score(ŷ, y, w) - # check synonyms - m = MTPR(return_type=Vector) - @test m(ŷ, y) == multiclass_tpr(ŷ, y) - @test m(ŷ, y, w) == multiclass_tpr(ŷ, y, w) - m = MTNR(return_type=Vector) - @test m(ŷ, y) == multiclass_tnr(ŷ, y) - @test m(ŷ, y, w) == multiclass_tnr(ŷ, y, w) - m = MFPR() - @test m(ŷ, y) == multiclass_fpr(ŷ, y) == multiclass_fallout(ŷ, y) - @test m(ŷ, y, w) == multiclass_fpr(ŷ, y, w) == - multiclass_fallout(ŷ, y, w) - m = MFNR() - @test m(ŷ, y) == multiclass_fnr(ŷ, y) == - multiclass_miss_rate(ŷ, y) - @test m(ŷ, y, w) == multiclass_fnr(ŷ, y, w) == - multiclass_miss_rate(ŷ, y, w) - m = MFDR() - @test m(ŷ, y) == multiclass_fdr(ŷ, y) - @test m(ŷ, y, w) == multiclass_fdr(ŷ, y, w) - m = MPPV() - @test m(ŷ, y) == MulticlassPrecision()(ŷ, y) == - multiclass_ppv(ŷ, y) - @test m(ŷ, y, w) == MulticlassPrecision()(ŷ, y, w) == - multiclass_ppv(ŷ, y, w) - m = MulticlassRecall() - @test m(ŷ, y) == multiclass_tpr(ŷ, y) - @test m(ŷ, y, w) == multiclass_tpr(ŷ, y, w) - @test m(ŷ, y) == multiclass_sensitivity(ŷ, y) == - multiclass_hit_rate(ŷ, y) - @test m(ŷ, y, w) == multiclass_sensitivity(ŷ, y, w) == - multiclass_hit_rate(ŷ, y, w) - m = MulticlassSpecificity() - @test m(ŷ, y) == multiclass_tnr(ŷ, y) == multiclass_specificity(ŷ, y) == - multiclass_selectivity(ŷ, y) - @test m(ŷ, y, w) == multiclass_tnr(ŷ, y, w) == - multiclass_specificity(ŷ, y, w) == multiclass_selectivity(ŷ, y, w) -end - - -@testset "Additional multiclass tests" begin - table = reshape(collect("aabbbccccddbabccbacccd"), 11, 2) - table = coerce(table, Multiclass); - yhat = table[:,1] # ['a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'c', 'd', 'd'] - y = table[:,2] # ['b', 'a', 'b', 'c', 'c', 'b', 'a', 'c', 'c', 'c', 'd'] - class_w = Dict('a'=>7, 'b'=>5, 'c'=>2, 'd'=> 0) - - # class | TP | FP | TP + FP | precision | FN | TP + FN | recall - # ------|----|----|------------------------------------|------- - # a | 1 | 1 | 2 | 1/2 | 1 | 2 | 1/2 - # b | 1 | 2 | 3 | 1/3 | 2 | 3 | 1/3 - # c | 2 | 2 | 4 | 1/2 | 3 | 5 | 2/5 - # d | 1 | 1 | 2 | 1/2 | 0 | 1 | 1 - - # helper: - inverse(x) = 1/x - harmonic_mean(x, y; beta=1.0) = - (1 + inverse(beta^2))*inverse(mean(inverse(beta^2*x)+ inverse(y))) - - # precision: - p_macro = mean([1/2, 1/3, 1/2, 1/2]) - @test MulticlassPrecision()(yhat, y) ≈ p_macro - p_macro_w = mean([7/2, 5/3, 2/2, 0/2]) - @test MulticlassPrecision()(yhat, y, class_w) ≈ p_macro_w - @test p_macro_w ≈ - @test_logs((:warn, r"Using macro"), - MulticlassPrecision(average=micro_avg)(yhat, y, class_w)) - p_micro = (1 + 1 + 2 + 1)/(2 + 3 + 4 + 2) - @test MulticlassPrecision(average=micro_avg)(yhat, y) ≈ p_micro - - # recall: - r_macro = mean([1/2, 1/3, 2/5, 1]) - @test MulticlassRecall(average=macro_avg)(yhat, y) ≈ r_macro - r_macro_w = mean([7/2, 5/3, 4/5, 0/1]) - @test MulticlassRecall(average=macro_avg)(yhat, y, class_w) ≈ r_macro_w - @test r_macro_w ≈ - @test_logs((:warn, r"Using macro"), - MulticlassRecall(average=micro_avg)(yhat, y, class_w)) - r_micro = (1 + 1 + 2 + 1)/(2 + 3 + 5 + 1) - @test MulticlassPrecision(average=micro_avg)(yhat, y) ≈ r_micro - - # fscore: - harm_means = [harmonic_mean(1/2, 1/2), - harmonic_mean(1/3, 1/3), - harmonic_mean(1/2, 2/5), - harmonic_mean(1/2, 1)] - f1_macro = mean(harm_means) - @test MulticlassFScore(average=macro_avg)(yhat, y) ≈ f1_macro - @test MulticlassFScore(average=no_avg, - return_type=Vector)(yhat, y, class_w) ≈ - [7, 5, 2, 0] .* harm_means - f1_macro_w = mean([7, 5, 2, 0] .* harm_means) - @test MulticlassFScore(average=macro_avg)(yhat, y, class_w) ≈ f1_macro_w - @test f1_macro_w ≈ - @test_logs((:warn, r"Using macro"), - MulticlassFScore(average=micro_avg)(yhat, y, class_w)) - f1_micro = harmonic_mean(p_micro, r_micro) - @test MulticlassFScore(average=micro_avg)(yhat, y) ≈ f1_micro - - # fscore, β=1/3: - harm_means = [harmonic_mean(1/2, 1/2, beta=1/3), - harmonic_mean(1/3, 1/3, beta=1/3), - harmonic_mean(1/2, 2/5, beta=1/3), - harmonic_mean(1/2, 1, beta=1/3)] - f1_macro = mean(harm_means) - @test MulticlassFScore(β=1/3, average=macro_avg)(yhat, y) ≈ f1_macro - @test MulticlassFScore(β=1/3, - average=no_avg, - return_type=Vector)(yhat, y, class_w) ≈ - [7, 5, 2, 0] .* harm_means - f1_macro_w = mean([7, 5, 2, 0] .* harm_means) - @test MulticlassFScore(β=1/3, - average=macro_avg)(yhat, y, class_w) ≈ f1_macro_w - @test f1_macro_w ≈ - @test_logs((:warn, r"Using macro"), - MulticlassFScore(β=1/3, - average=micro_avg)(yhat, y, class_w)) - f1_micro = harmonic_mean(p_micro, r_micro, beta=1/3) - @test MulticlassFScore(β=1/3, average=micro_avg)(yhat, y) ≈ f1_micro -end - -@testset "docstrings coverage" begin - @test startswith(info(BrierScore()).docstring, "`BrierScore`") -end diff --git a/test/measures/loss_functions_interface.jl b/test/measures/loss_functions_interface.jl deleted file mode 100644 index 8c59945b..00000000 --- a/test/measures/loss_functions_interface.jl +++ /dev/null @@ -1,68 +0,0 @@ -rng = StableRNG(614) - -# convert a Binary vector into vector of +1 or -1 values -# (for testing only): -pm1(y) = Int8(2) .* (Int8.(MLJBase.int(y))) .- Int8(3) - -const MARGIN_LOSSES = MLJBase.MARGIN_LOSSES -const DISTANCE_LOSSES = MLJBase.DISTANCE_LOSSES - -# using `WeightedSum` instead of `WeightedMean`; see -# https://github.com/JuliaML/LossFunctions.jl/issues/149 -WeightedSum(w) = LossFunctions.AggMode.WeightedMean(w, normalize=false) - -@testset "naked" begin - @test MLJBase.naked(MLJBase.LossFunctions.PeriodicLoss{Float64}) == - :PeriodicLoss - @test MLJBase.naked(MLJBase.LossFunctions.PeriodicLoss) == - :PeriodicLoss -end - -@testset "LossFunctions.jl - binary" begin - y = categorical(["yes", "yes", "no", "yes"]) - yes, no = y[1], y[3] - dyes = MLJBase.UnivariateFinite([yes, no], [0.6, 0.4]) - dno = MLJBase.UnivariateFinite([yes, no], [0.3, 0.7]) - yhat = [dno, dno, dyes, dyes] - w = [1, 2, 3, 4] - - @test MLJBase.ZeroOneLoss()(yhat, y) ≈ [1, 1, 1, 0] - @test MLJBase.zero_one_loss(yhat,y, w) ≈ [1, 2, 3, 0] - - N = 10 - y = categorical(rand(rng, ["yes", "no"], N), ordered=true) - levels!(y, ["no", "yes"]) - no, yes = MLJBase.classes(y[1]) - @test pm1([yes, no]) in [[+1, -1], [-1, +1]] - ym = pm1(y) # observations for raw LossFunctions measure - p_vec = rand(N) - yhat = MLJBase.UnivariateFinite([no, yes], p_vec, augment=true) - yhatm = MLJBase._scale.(p_vec) # predictions for raw LossFunctions measure - w = rand(rng, N) - - for M_ex in MARGIN_LOSSES - m = eval(:(MLJBase.$M_ex())) - @test m(yhat, y) ≈ (getfield(m, :loss)).(yhatm, ym) - @test m(yhat, y, w) ≈ - w .* (getfield(m, :loss)).(yhatm, ym) - end -end - -@testset "LossFunctions.jl - continuous" begin - # losses for continuous targets: - N = 10 - y = randn(rng, N) - yhat = randn(rng, N) - X = nothing - w = rand(rng, N) - - for M_ex in DISTANCE_LOSSES - m = eval(:(MLJBase.$M_ex())) - m_ex = MLJBase.snakecase(M_ex) - @test m == eval(:(MLJBase.$m_ex)) - @test m(yhat, y) ≈ - (getfield(m, :loss)).(yhat, y) - @test m(yhat ,y, w) ≈ - w .* (getfield(m, :loss)).(yhat, y) - end -end diff --git a/test/measures/measure_search.jl b/test/measures/measure_search.jl deleted file mode 100644 index f8aa5e4d..00000000 --- a/test/measures/measure_search.jl +++ /dev/null @@ -1,42 +0,0 @@ -ms = map(measures()) do m - m.name -end -@test "LogLoss" in ms -@test "RootMeanSquaredError" in ms - -# test `M()` makes sense for all measure types `M` extracted from `name`, -@test all(Symbol.(ms)) do ex - try - eval(:($ex())) - true - catch - false - end -end - -S = AbstractVector{Union{Missing,Multiclass{3}}} -task(m) = S <: m.target_scitype - -ms = map(measures(task)) do m - m.name -end - -@test "LogLoss" in ms -@test !("RootMeanSquaredError" in ms) - -task(m) = AbstractVector{Continuous} <: m.target_scitype - -ms = map(measures(task)) do m - m.name -end - -@test !("Accuracy" in ms) -@test "RootMeanSquaredError" in ms - -ms = map(measures("Brier")) do m - m.name -end - -@test Set(ms) == Set(["BrierLoss", "BrierScore"]) - -true diff --git a/test/measures/measures.jl b/test/measures/measures.jl deleted file mode 100644 index 602c3e78..00000000 --- a/test/measures/measures.jl +++ /dev/null @@ -1,134 +0,0 @@ -module TestMeasures - -using MLJBase, Test -import Distributions -using CategoricalArrays -using Statistics -import LossFunctions -using StableRNGs -using OrderedCollections: LittleDict - -rng = StableRNGs.StableRNG(123) - -@testset "aggregation" begin - v = rand(5) - @test aggregate(v, mae) ≈ mean(v) - @test aggregate(v, TruePositive()) ≈ sum(v) - @test aggregate(v, rms) ≈ sqrt(mean(v.^2)) - λ = rand() - @test aggregate(λ, rms) === λ - @test aggregate(aggregate(v, l2), l2) == aggregate(v, l2) - m = LittleDict([0, 1, 2, 3, 4], v) - @test aggregate(m, MTPR()) == mean(v) -end - -@testset "metadata" begin - measures() - measures(m -> m.target_scitype <: AbstractVector{<:Finite} && - m.supports_weights) - info(rms) - @test true -end - -@testset "coverage" begin - # just checking that the traits work not that they're correct - @test orientation(BrierScore()) == :score - @test orientation(auc) == :score - @test orientation(rms) == :loss - - @test reports_each_observation(auc) == false - @test is_feature_dependent(auc) == false - - @test MLJBase.distribution_type(auc) == MLJBase.UnivariateFinite -end - -@testset "MLJBase.value" begin - yhat = randn(rng,5) - X = (weight=randn(rng,5), x1 = randn(rng,5)) - y = randn(rng,5) - w = randn(rng,5) - - @test MLJBase.value(mae, yhat, nothing, y, nothing) ≈ mae(yhat, y) - @test MLJBase.value(mae, yhat, nothing, y, w) ≈ mae(yhat, y, w) - - spooky(yhat, y) = abs.(yhat - y) |> mean - @test MLJBase.value(spooky, yhat, nothing, y, nothing) ≈ mae(yhat, y) - - cool(yhat, y, w) = abs.(yhat - y) .* w |> mean - MLJBase.supports_weights(::Type{typeof(cool)}) = true - @test MLJBase.value(cool, yhat, nothing, y, w) ≈ mae(yhat, y, w) - - funky(yhat, X, y) = X.weight .* abs.(yhat - y) |> mean - MLJBase.is_feature_dependent(::Type{typeof(funky)}) = true - @test MLJBase.value(funky, yhat, X, y, nothing) ≈ mae(yhat, y, X.weight) - - weird(yhat, X, y, w) = w .* X.weight .* abs.(yhat - y) |> mean - MLJBase.is_feature_dependent(::Type{typeof(weird)}) = true - MLJBase.supports_weights(::Type{typeof(weird)}) = true - @test MLJBase.value(weird, yhat, X, y, w) ≈ mae(yhat, y, X.weight .* w) -end - -mutable struct DRegressor <: Deterministic end -MLJBase.target_scitype(::Type{<:DRegressor}) = - AbstractVector{<:Continuous} - -mutable struct D2Regressor <: Deterministic end -MLJBase.target_scitype(::Type{<:D2Regressor}) = - AbstractVector{Continuous} - -mutable struct DClassifier <: Deterministic end -MLJBase.target_scitype(::Type{<:DClassifier}) = - AbstractVector{<:Finite} - -mutable struct PClassifier <: Probabilistic end -MLJBase.target_scitype(::Type{<:PClassifier}) = - AbstractVector{<:Finite} - -mutable struct PRegressor <: Probabilistic end -MLJBase.target_scitype(::Type{<:PRegressor}) = - AbstractVector{<:Continuous} - -mutable struct PCountRegressor <: Probabilistic end -MLJBase.target_scitype(::Type{<:PCountRegressor}) = - AbstractVector{<:Count} - -@testset "default_measure" begin - @test MLJBase.default_measure(DRegressor()) == rms - @test MLJBase.default_measure(D2Regressor()) == rms - @test MLJBase.default_measure(DClassifier()) == misclassification_rate - @test MLJBase.default_measure(PClassifier()) == log_loss - - @test MLJBase.default_measure(DRegressor) == rms - @test MLJBase.default_measure(D2Regressor) == rms - @test MLJBase.default_measure(DClassifier) == misclassification_rate - @test MLJBase.default_measure(PClassifier) == log_loss - - @test MLJBase.default_measure(PRegressor) == log_loss - @test MLJBase.default_measure(PCountRegressor) == log_loss -end - -include("confusion_matrix.jl") -include("roc.jl") -include("continuous.jl") -include("finite.jl") -include("probabilistic.jl") -include("loss_functions_interface.jl") - -@testset "show method for measures" begin - io = IOBuffer() - for meta in measures() - m = eval(Meta.parse("$(meta.name)()")) - show(io, MIME("text/plain"), m) - show(io, m) - end -end - -@testset "missing and NaN values in aggregation" begin - v =[1, 2, missing, 5, NaN] - @test MLJBase.Sum()(v) == 8 - @test MLJBase.RootMeanSquare()(v) ≈ sqrt((1 + 4 + 25)/3) - @test MLJBase.Mean()(Union{Missing,Float32}[]) |> isnan -end - -end -true diff --git a/test/measures/probabilistic.jl b/test/measures/probabilistic.jl deleted file mode 100644 index 733c0d20..00000000 --- a/test/measures/probabilistic.jl +++ /dev/null @@ -1,174 +0,0 @@ -rng = StableRNG(51803) -using LinearAlgebra - -const Vec = AbstractVector - -@testset "AUC" begin - # this is random binary and random scores generated with numpy - # then using roc_auc_score from sklearn to get the AUC - # we check that we recover a comparable AUC and that it's invariant - # to ordering. - c = ["neg", "pos"] - y = categorical(c[[0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, - 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, - 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, - 1, 0] .+ 1]) - probs = [ - 0.90237535, 0.41276349, 0.94511611, 0.08390761, 0.55847392, - 0.26043136, 0.78565351, 0.20133953, 0.7404382 , 0.15307601, - 0.59596716, 0.8169512 , 0.88200483, 0.23321489, 0.94050483, - 0.27593662, 0.60702176, 0.36427036, 0.35481784, 0.06416543, - 0.45576954, 0.12354048, 0.79830435, 0.15799818, 0.20981099, - 0.43451663, 0.24020098, 0.11401055, 0.25785748, 0.86490263, - 0.75715379, 0.06550534, 0.12628999, 0.18878245, 0.1283757 , - 0.76542903, 0.8780248 , 0.86891113, 0.24835709, 0.06528076, - 0.72061354, 0.89451634, 0.95634394, 0.07555979, 0.16345437, - 0.43498831, 0.37774708, 0.31608861, 0.41369339, 0.95691113] - - ŷ = UnivariateFinite(y[1:2], probs, augment=true) - # ŷ = [UnivariateFinite(y[1:2], [1.0 - p, p]) for p in [ - # 0.90237535, 0.41276349, 0.94511611, 0.08390761, 0.55847392, - # 0.26043136, 0.78565351, 0.20133953, 0.7404382 , 0.15307601, - # 0.59596716, 0.8169512 , 0.88200483, 0.23321489, 0.94050483, - # 0.27593662, 0.60702176, 0.36427036, 0.35481784, 0.06416543, - # 0.45576954, 0.12354048, 0.79830435, 0.15799818, 0.20981099, - # 0.43451663, 0.24020098, 0.11401055, 0.25785748, 0.86490263, - # 0.75715379, 0.06550534, 0.12628999, 0.18878245, 0.1283757 , - # 0.76542903, 0.8780248 , 0.86891113, 0.24835709, 0.06528076, - # 0.72061354, 0.89451634, 0.95634394, 0.07555979, 0.16345437, - # 0.43498831, 0.37774708, 0.31608861, 0.41369339, 0.95691113]] - @test isapprox(auc(ŷ, y), 0.455716, rtol=1e-4) - ŷ_unwrapped = [ŷ...] - @test isapprox(auc(ŷ_unwrapped, y), 0.455716, rtol=1e-4) - - # reversing the roles of positive and negative should return very - # similar score - y2 = deepcopy(y); - levels!(y2, reverse(levels(y2))); - @test y == y2 - @test levels(y) != levels(y2) - ŷ2 = UnivariateFinite(y2[1:2], probs, augment=true) # same probs - @test isapprox(auc(ŷ2, y2), auc(ŷ, y), rtol=1e-4) - - # The auc algorithm should be able to handle the case where two or more - # samples in the prediction vector has the same UnivariateFinite distribution - # We check this by comparing our auc with that gotten from roc_auc_score from sklearn. - y = categorical(["class_1","class_1","class_0","class_0","class_1","class_1","class_0"]) - ŷ = UnivariateFinite(levels(y), [0.8,0.7,0.5,0.5,0.5,0.5,0.3], augment=true, pool=y) - # We can see that ŷ[3] ≈ ŷ[4] ≈ ŷ[5] ≈ ŷ[6] - @test isapprox(auc(ŷ, y), 0.8333333333333334, rtol=1e-16) -end - -@testset "Log, Brier, Spherical - finite case" begin - y = categorical(collect("abb")) - L = [y[1], y[2]] - d1 = UnivariateFinite(L, [0.1, 0.9]) # a - d2 = UnivariateFinite(L, Float32[0.4, 0.6]) # b - d3 = UnivariateFinite(L, [0.2, 0.8]) # b - yhat = [d1, d2, d3] - ym = vcat(y, [missing,]) - yhatm = vcat(yhat, [d3, ]) - - @test mean(log_loss(yhat, y)) ≈ - Float32(-(log(0.1) + log(0.6) + log(0.8))/3) - @test mean(skipmissing(log_loss(yhatm, ym))) ≈ - Float32(-(log(0.1) + log(0.6) + log(0.8))/3) - yhat = UnivariateFinite(L, [0.1 0.9; - 0.4 0.6; - 0.2 0.8]) - @test isapprox(mean(log_loss(yhat, y)), - -(log(0.1) + log(0.6) + log(0.8))/3, atol=eps(Float32)) - - @test log_score(yhat, y) ≈ -log_loss(yhat, y) - - # sklearn test - # >>> from sklearn.metrics import log_loss - # >>> log_loss(["spam", "ham", "ham", "spam","ham","ham"], - # [[.1, .9], [.9, .1], [.8, .2], [.35, .65], [0.2, 0.8], [0.3,0.7]]) - # 0.6130097025803921 - y2 = categorical(["spam", "ham", "ham", "spam", "ham", "ham"]) - L2 = classes(y2[1]) - probs = vcat([.1 .9], [.9 .1], [.8 .2], [.35 .65], [0.2 0.8], [0.3 0.7]) - yhat2 = UnivariateFinite(L2, probs) - y2m = vcat(y2, [missing,]) - yhat2m = UnivariateFinite(L2, vcat(probs, [0.1 0.9])) - @test mean(log_loss(yhat2, y2)) ≈ 0.6130097025803921 - @test mean(skipmissing(log_loss(yhat2, y2))) ≈ 0.6130097025803921 - - ## Brier - scores = BrierScore()(yhat, y) - @test size(scores) == size(y) - @test Float32.(scores) ≈ [-1.62, -0.32, -0.08] - scoresm = BrierScore()(yhatm, ym) - @test Float32.((scoresm)[1:3]) ≈ [-1.62, -0.32, -0.08] - @test ismissing(scoresm[end]) - # test specialized broadcasting on brierloss - @test BrierLoss()(yhat, y) == -BrierScore()(yhat, y) - # sklearn test - # >>> from sklearn.metrics import brier_score_loss - # >>> brier_score_loss([1, 0, 0, 1, 0, 0], [.9, .1, .2, .65, 0.8, 0.7]) - # 0.21875 NOTE: opposite orientation - @test -mean(BrierScore()(yhat2, y2)) / 2 ≈ 0.21875 - probs2 = [[.1, .9], [Float32(0.9), Float32(1) - Float32(0.9)], [.8, .2], - [.35, .65], [0.2, 0.8], [0.3, 0.7]] - yhat3 = [UnivariateFinite(L2, prob) for prob in probs2] - @test -mean(BrierScore()(yhat3, y2) / 2) ≈ 0.21875 - @test mean(BrierLoss()(yhat3, y2) / 2) ≈ -mean(BrierScore()(yhat3, y2) / 2) - - # Spherical - s = SphericalScore() # SphericalScore(2) - norms = [norm(probs[i,:]) for i in 1:size(probs, 1)] - @test (pdf.(yhat2, y2) ./ norms) ≈ s(yhat2, y2) - # non-performant version: - yhat4 = [yhat2...] - @test (pdf.(yhat2, y2) ./ norms) ≈ s(yhat4, y2) -end - -@testset "LogScore, BrierScore, SphericalScore - infinite case" begin - uniform = Distributions.Uniform(2, 5) - betaprime = Distributions.BetaPrime() - discrete_uniform = Distributions.DiscreteUniform(2, 5) - w = [2, 3] - - # brier - yhat = [missing, uniform] - @test isapprox(brier_score(yhat, [1.0, 1.0]) |> last, -1/3) - @test isapprox(brier_score(yhat, [NaN, 4.0]) |> last, 1/3) - @test isapprox(brier_score(yhat, [1.0, 1.0], w) |> last, -1) - yhat = [missing, uniform] - # issue https://github.com/JuliaStats/Distributions.jl/issues/1392 - @test_broken isapprox(brier_score(yhat, [missing, 4.0], w), [1,]) - yhat = [discrete_uniform, discrete_uniform] - @test isapprox(brier_score(yhat, [NaN, 1.0]), [-1/4, -1/4,]) - @test isapprox(brier_score(yhat, [4.0, 4.0]), [1/4, 1/4,]) - - # spherical - yhat = [uniform, uniform] - @test isapprox(spherical_score(yhat, [1.0, 1.0]), [0, 0]) - @test isapprox(spherical_score(yhat, [NaN, 4.0]), [0, 1/sqrt(3),]) - # issue https://github.com/JuliaStats/Distributions.jl/issues/1392 - @test_broken isapprox(spherical_score(yhat, [missing, 4.0], w), [sqrt(3),]) - @test isapprox(spherical_score(yhat, [4.0, 4.0], w), [2/sqrt(3), sqrt(3),]) - yhat = [discrete_uniform, discrete_uniform] - @test isapprox(spherical_score(yhat, [NaN, 1.0]), [0, 0]) - @test isapprox(spherical_score(yhat, [4.0, 4.0]), [1/2, 1/2]) - - # log - yhat = [uniform, uniform] - @test isapprox(log_score(yhat, [4.0, 4.0]), [-log(3), -log(3),]) - @test isapprox(log_score(yhat, [4.0, 4.0], w), [-2*log(27)/3, -log(27)]) - yhat = [discrete_uniform, discrete_uniform] - # issue https://github.com/JuliaStats/Distributions.jl/issues/1392 - @test_broken isapprox(log_score(yhat, [missing, 4.0]), [-log(4),]) - - log_score([missing, uniform], [4.0, 4.0]) - - # errors - @test_throws(MLJBase.err_l2_norm(brier_score), - brier_score([betaprime, betaprime], [1.0, 1.0])) - s = SphericalScore(alpha=1) - @test_throws MLJBase.ERR_UNSUPPORTED_ALPHA s(yhat, [1.0, 1.0]) -end - -true diff --git a/test/measures/roc.jl b/test/measures/roc.jl deleted file mode 100644 index aaaed8b7..00000000 --- a/test/measures/roc.jl +++ /dev/null @@ -1,13 +0,0 @@ -@testset "ROC" begin - y = [ 0 0 0 1 0 1 1 0] |> vec |> categorical - s = [0.0 0.1 0.1 0.1 0.2 0.2 0.5 0.5] |> vec - ŷ = UnivariateFinite([0, 1], s, augment=true, pool=y) - - fprs, tprs, ts = roc(ŷ, y) - - sk_fprs = [0. , 0.2, 0.4, 0.8, 1. ] - sk_tprs = [0. , 0.33333333, 0.66666667, 1., 1.] - - @test fprs ≈ sk_fprs - @test tprs ≈ sk_tprs -end diff --git a/test/operations.jl b/test/operations.jl index e14b7702..5970cb0f 100644 --- a/test/operations.jl +++ b/test/operations.jl @@ -57,7 +57,7 @@ using ..Models @test_throws ArgumentError transform(m, Tuple(y1), Tuple(y2)) end -@testset "operations on network-composite models" begin +@testset "operations on NetworkComposite models" begin X = MLJBase.table(rand(4, 4)) y = rand(4) m = fit!(machine(SimpleProbabilisticNetworkCompositeModel(), X, y), verbosity=0) @@ -67,21 +67,6 @@ end @test_throws ErrorException transform(m, X) end -# Test below to be removed after next breaking release -@testset "operations on composite/surrogate models" begin - X = MLJBase.table(rand(4, 4)) - y = rand(4) - m = fit!(machine(SimpleDeterministicCompositeModel(), X, y), verbosity=0) - @test predict(m, X) == m.fitresult.predict(X) - @test_throws ErrorException transform(m, X) - - m = fit!(machine(SimpleProbabilisticCompositeModel(), X, y), verbosity=0) - predictions = m.fitresult.predict(X) - @test predict(m, X) == predictions - @test predict_mode(m, X) == mode.(predictions) - @test_throws ErrorException transform(m, X) -end - end true diff --git a/test/preliminaries.jl b/test/preliminaries.jl index b806a840..bffc1f4e 100644 --- a/test/preliminaries.jl +++ b/test/preliminaries.jl @@ -12,12 +12,8 @@ using Distributed addprocs(; exeflags="--project=$(Base.active_project())") @info "nprocs() = $(nprocs())" -@static if VERSION >= v"1.3.0-DEV.573" - import .Threads - @info "nthreads() = $(Threads.nthreads())" -else - @info "Running julia $(VERSION). Multithreading tests excluded. " -end +import .Threads +@info "nthreads() = $(Threads.nthreads())" @everywhere begin using MLJModelInterface @@ -27,6 +23,7 @@ end using Logging using ComputationalResources using StableRNGs + using StatisticalMeasures end import TypedTables diff --git a/test/resampling.jl b/test/resampling.jl index c170039a..27850375 100644 --- a/test/resampling.jl +++ b/test/resampling.jl @@ -5,6 +5,9 @@ import ComputationalResources: CPU1, CPUProcesses, CPUThreads using .TestUtilities using ProgressMeter import Tables +@everywhere import StatisticalMeasures.StatisticalMeasuresBase as API +using StatisticalMeasures +import LearnAPI @everywhere begin using .Models @@ -25,13 +28,18 @@ struct DummyInterval <: Interval end dummy_interval=DummyInterval() dummy_measure_det(yhat, y) = 42 -MLJBase.target_scitype(::typeof(dummy_measure_det)) = Table(MLJBase.Textual) -MLJBase.prediction_type(::typeof(dummy_measure_det)) = :deterministic - -dummy_measure_interval(yhat, y) = [123, 456] -MLJBase.target_scitype(::typeof(dummy_measure_interval)) = - Table(MLJBase.Textual) -MLJBase.prediction_type(::typeof(dummy_measure_interval)) = :interval +API.@trait( + typeof(dummy_measure_det), + observation_scitype = MLJBase.Textual, + kind_of_proxy = LearnAPI.LiteralTarget(), +) + +dummy_measure_interval(yhat, y) = 42 +API.@trait( + typeof(dummy_measure_interval), + observation_scitype = MLJBase.Textual, + kind_of_proxy = LearnAPI.ConfidenceInterval(), +) @testset "_actual_operations" begin clf = ConstantClassifier() @@ -49,7 +57,7 @@ MLJBase.prediction_type(::typeof(dummy_measure_interval)) = :interval 1) == [predict_mean, predict_mean] - # handling of a measure with `:unknown` `prediction_type` (eg, + # handling of a measure with `nothing` `kind_of_proxy` (eg, # custom measure): my_mae(yhat, y) = abs.(yhat - y) @test( @@ -71,21 +79,29 @@ MLJBase.prediction_type(::typeof(dummy_measure_interval)) = :interval [predict_mode]) @test MLJBase._actual_operations(nothing, [l2,], rgs, 1) == [predict_mean, ] - @test_throws(MLJBase.err_incompatible_prediction_types(clf_det, LogLoss()), - MLJBase._actual_operations(nothing, [LogLoss(),], clf_det, 1)) + @test_throws( + MLJBase.err_incompatible_prediction_types(clf_det, LogLoss()), + MLJBase._actual_operations(nothing, [LogLoss(),], clf_det, 1), + ) @test MLJBase._actual_operations(nothing, measures_det, clf_det, 1) == [predict, predict] - # measure/model differ in prediction type but weird target_scitype: + # measure/model differ in prediction type: @test_throws( MLJBase.err_ambiguous_operation(clf, dummy_measure_det), - MLJBase._actual_operations(nothing, [dummy_measure_det, ], clf, 1)) + MLJBase._actual_operations(nothing, [dummy_measure_det, ], clf, 1), + ) # measure has :interval prediction type but model does not (2 cases): @test_throws( MLJBase.err_ambiguous_operation(clf, dummy_measure_interval), - MLJBase._actual_operations(nothing, - [dummy_measure_interval, ], clf, 1)) + MLJBase._actual_operations( + nothing, + [dummy_measure_interval, ], + clf, + 1, + ), + ) @test_throws( MLJBase.err_ambiguous_operation(clf_det, dummy_measure_interval), MLJBase._actual_operations(nothing, @@ -103,16 +119,6 @@ MLJBase.prediction_type(::typeof(dummy_measure_interval)) = :interval [LogLoss(), ], dummy_interval, 1)) end -@testset "_feature_dependencies_exist" begin - measures = Any[rms, rsq, log_loss, brier_score] - @test !MLJBase._feature_dependencies_exist(measures) - my_feature_dependent_loss(ŷ, X, y) = - sum(abs.(ŷ - y) .* X.penalty)/sum(X.penalty); - MLJBase.is_feature_dependent(::typeof(my_feature_dependent_loss)) = true - push!(measures, my_feature_dependent_loss) - @test MLJBase._feature_dependencies_exist(measures) -end - @testset_accelerated "dispatch of resources and progress meter" accel begin @info "Checking progress bars:" @@ -175,34 +181,50 @@ end y = rand(rng,4) # model prediction type is Probablistic but measure is Deterministic: - @test_throws(ArgumentError, - MLJBase._check_measure(rms, predict, model, y)) + @test_throws( + MLJBase.ERR_MEASURES_PROBABILISTIC(rms, MLJBase.LOG_SUGGESTION2), + MLJBase._check_measure(rms, predict, model, y), + ) @test MLJBase._check_measure(rms, predict_mean, model, y) @test MLJBase._check_measure(rms, predict_median, model, y) - # has `y` `Finite` elscityp but measure `rms` is for `Continuous`: + # has `y` `Finite` elscitype but measure `rms` is for `Continuous`: y=categorical(collect("abc")) - @test_throws(ArgumentError, - MLJBase._check_measure(rms, predict_median, model, y)) + @test_throws( + MLJBase.ERR_MEASURES_OBSERVATION_SCITYPE( + rms, + Union{Missing,Infinite}, + Multiclass{3}, + ), + MLJBase._check_measure(rms, predict_median, model, y), + ) model = ConstantClassifier() # model prediction type is Probablistic but measure is Deterministic: - @test_throws(ArgumentError, - MLJBase._check_measure(mcr, predict, model, y)) + @test_throws( + MLJBase.ERR_MEASURES_PROBABILISTIC(mcr, MLJBase.LOG_SUGGESTION1), + MLJBase._check_measure(mcr, predict, model, y), + ) @test MLJBase._check_measure(mcr, predict_mode, model, y) # `Determistic` model but `Probablistic` measure: model = DeterministicConstantClassifier() - @test_throws(ArgumentError, - MLJBase._check_measure(cross_entropy, predict, model, y)) + @test_throws( + MLJBase.ERR_MEASURES_DETERMINISTIC(cross_entropy), + MLJBase._check_measure(cross_entropy, predict, model, y), + ) # measure with wrong target_scitype: - @test_throws(ArgumentError, - MLJBase._check_measures([brier_score, rms], - [predict_mode, predict_mean], - model, y)) + @test_throws( + MLJBase.ERR_MEASURES_DETERMINISTIC(brier_score), + MLJBase._check_measures( + [brier_score, rms], + [predict_mode, predict_mean], + model, y, + ), + ) model = ConstantClassifier() @test MLJBase._check_measures([brier_score, cross_entropy, accuracy], @@ -211,8 +233,6 @@ end end @testset "check weights" begin - @test_throws(MLJBase.ERR_WEIGHTS_REAL, - MLJBase._check_weights([:junk, :junk], 2)) @test_throws(MLJBase.ERR_WEIGHTS_LENGTH, MLJBase._check_weights([0.5, 0.5], 3)) @test MLJBase._check_weights([0.5, 0.5], 2) @@ -227,18 +247,18 @@ end @test MLJBase._check_class_weights(w, ['b', 'a']) end +@everywhere begin + user_rms(yhat, y) = mean((yhat -y).^2) |> sqrt + # deliberately omitting `consumes_multiple_observations` trait: + API.@trait typeof(user_rms) kind_of_proxy=LearnAPI.LiteralTarget() +end + @testset_accelerated "folds specified" accel begin x1 = ones(10) x2 = ones(10) X = (x1=x1, x2=x2) y = [1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0] - my_rms(yhat, y) = sqrt(mean((yhat -y).^2)) - my_mae(yhat, y) = abs.(yhat - y) - MLJBase.reports_each_observation(::typeof(my_mae)) = true - MLJBase.prediction_type(::typeof(my_rms)) = :deterministic - MLJBase.prediction_type(::typeof(my_mae)) = :deterministic - resampling = [(3:10, 1:2), ([1, 2, 5, 6, 7, 8, 9, 10], 3:4), ([1, 2, 3, 4, 7, 8, 9, 10], 5:6), @@ -251,19 +271,27 @@ end mach = machine(model, X, y, cache=cache) # check detection of incompatible measure (cross_entropy): - @test_throws ArgumentError evaluate!(mach, resampling=resampling, - measure=[cross_entropy, rmslp1], - verbosity=verb, - acceleration=accel) + @test_throws( + MLJBase.err_incompatible_prediction_types(model, cross_entropy), + evaluate!( + mach, + resampling=resampling, + measure=[cross_entropy, rmslp1], + verbosity=verb, + acceleration=accel, + ), + ) result = evaluate!(mach, resampling=resampling, verbosity=verb, - measure=[my_rms, my_mae, rmslp1], acceleration=accel) + measure=[user_rms, mae, rmslp1], acceleration=accel) v = [1/2, 3/4, 1/2, 3/4, 1/2] @test result.per_fold[1] ≈ v @test result.per_fold[2] ≈ v @test result.per_fold[3][1] ≈ abs(log(2) - log(2.5)) - @test ismissing(result.per_observation[1]) + @test result.per_observation[1] ≈ map(result.per_fold[1]) do μ + fill(μ, 2) + end @test result.per_observation[2][1] ≈ [1/2, 1/2] @test result.per_observation[2][2] ≈ [3/4, 3/4] @test result.measurement[1] ≈ mean(v) @@ -276,6 +304,42 @@ end end end +@testset "folds specified - per_observation=false" begin + accel = CPU1() + cache = true + x1 = ones(10) + x2 = ones(10) + X = (x1=x1, x2=x2) + y = [1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0] + + resampling = [(3:10, 1:2), + ([1, 2, 5, 6, 7, 8, 9, 10], 3:4), + ([1, 2, 3, 4, 7, 8, 9, 10], 5:6), + ([1, 2, 3, 4, 5, 6, 9, 10], 7:8), + (1:8, 9:10)] + + model = DeterministicConstantRegressor() + mach = machine(model, X, y, cache=cache) + + result = evaluate!(mach, resampling=resampling, verbosity=verb, + measure=[user_rms, mae, rmslp1], acceleration=accel, + per_observation=false) + + v = [1/2, 3/4, 1/2, 3/4, 1/2] + + @test result.per_fold[1] ≈ v + @test result.per_fold[2] ≈ v + @test result.per_fold[3][1] ≈ abs(log(2) - log(2.5)) + @test result.per_observation isa Vector{Missing} + @test result.measurement[1] ≈ mean(v) + @test result.measurement[2] ≈ mean(v) + + # fitted_params and report per fold: + @test map(fp->fp.fitresult, result.fitted_params_per_fold) ≈ + [1.5, 1.25, 1.5, 1.25, 1.5] + @test all(isnothing, result.report_per_fold) +end + @testset "repeated resampling" begin x1 = ones(20) x2 = ones(20) @@ -313,10 +377,11 @@ end model = Models.DeterministicConstantRegressor() for cache in [true, false] mach = machine(model, X, y, cache=cache) + # to see if a default measure is found: + evaluate!(mach, resampling=holdout, verbosity=verb, + acceleration=accel) result = evaluate!(mach, resampling=holdout, verbosity=verb, measure=[rms, rmslp1], acceleration=accel) - result = evaluate!(mach, resampling=holdout, verbosity=verb, - acceleration=accel) @test result.measurement[1] ≈ 2/3 # test direct evaluation of a model + data: @@ -454,7 +519,7 @@ end d for fold in folds]) end -@testset_accelerated "sample weights in evaluation" accel begin +@testset_accelerated "weights in evaluation" accel begin # cv: x1 = ones(4) x2 = ones(4) @@ -483,7 +548,7 @@ end X, y = make_blobs(rng=rng) cv=CV(nfolds = 2) fold1, fold2 = partition(eachindex(y), 0.5) - m = MLJBase.MulticlassFScore() + m = MulticlassFScore() class_w = Dict(1=>1, 2=>2, 3=>3) model = Models.DeterministicConstantClassifier() @@ -637,13 +702,6 @@ end measure=misclassification_rate, weights = fill(1, 100), acceleration=accel, verbosity=verb)) - - @test_throws(ArgumentError, - evaluate!(mach, resampling=Holdout(fraction_train=0.6), - operation=predict_mode, - measure=misclassification_rate, - weights = fill('a', 5), acceleration=accel, - verbosity=verb)) end # resampling on a subset of all rows: @@ -813,7 +871,7 @@ end operation=predict_mode, measure=ConfusionMatrix(), resampling=CV(), - ) + ); printed_evaluations = sprint(show, "text/plain", evaluations) @test contains(printed_evaluations, "N/A") end diff --git a/test/runtests.jl b/test/runtests.jl index 8b07929e..0c5593af 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -18,6 +18,7 @@ include("preliminaries.jl") @conditional_testset "misc" begin @test include("utilities.jl") @test include("static.jl") + @test include("show.jl") end @conditional_testset "interface" begin @@ -25,10 +26,8 @@ end @test include("interface/data_utils.jl") end -@conditional_testset "measures" begin - @test include("measures/measures.jl") - @test include("measures/measure_search.jl") - @test include("measures/doc_strings.jl") +@conditional_testset "default_measures" begin + @test include("default_measures.jl") end @conditional_testset "resampling" begin @@ -57,14 +56,11 @@ end @test include("composition/learning_networks/nodes.jl") @test include("composition/learning_networks/inspection.jl") @test include("composition/learning_networks/signatures.jl") - @test include("composition/learning_networks/deprecated_machines.jl") @test include("composition/learning_networks/replace.jl") end @conditional_testset "composition_models" begin @test include("composition/models/network_composite.jl") - @test include("composition/models/deprecated_methods.jl") - @test include("composition/models/deprecated_from_network.jl") @test include("composition/models/inspection.jl") @test include("composition/models/pipelines.jl") @test include("composition/models/transformed_target_model.jl") diff --git a/test/show.jl b/test/show.jl new file mode 100644 index 00000000..44aff52c --- /dev/null +++ b/test/show.jl @@ -0,0 +1,14 @@ +using .Models + +@testset "display of models" begin + io = IOBuffer() + show(io, KNNRegressor()) + @test String(take!(io)) == "KNNRegressor(K = 5, …)" + show(io, MIME("text/plain"), KNNRegressor()) + @test String(take!(io)) == + "KNNRegressor(\n K = 5, \n algorithm = :kdtree, \n "* + "metric = Distances.Euclidean(0.0), \n leafsize = 10, \n "* + "reorder = true, \n weights = :uniform)" +end + +true diff --git a/test/utilities.jl b/test/utilities.jl index f9e40580..5356ce66 100644 --- a/test/utilities.jl +++ b/test/utilities.jl @@ -171,5 +171,50 @@ end "sin, cos, tan, ..." end +@testset "observation" begin + @test MLJBase.observation(AbstractVector{Count}) == + Count + @test MLJBase.observation(AbstractVector{<:Count}) == + Count + @test MLJBase.observation(AbstractVector{<:Union{Missing,Count}}) == + Union{Missing,Count} + @test MLJBase.observation(AbstractMatrix{<:Count}) == + AbstractVector{<:Count} + @test MLJBase.observation(AbstractMatrix{Union{Missing,Count}}) == + AbstractVector{Union{Missing,Count}} + @test MLJBase.observation(AbstractMatrix{<:Union{Missing,Count}}) == + AbstractVector{<:Union{Missing,Count}} + @test MLJBase.observation(Table(Count)) == AbstractVector{<:Count} +end + +@testset "guess_observation_scitype" begin + @test MLJBase.guess_observation_scitype([missing, 1, 2, 3]) == + Union{Missing, Count} + @test MLJBase.guess_observation_scitype(rand(3, 2)) == + AbstractVector{Continuous} + @test MLJBase.guess_observation_scitype((x=rand(3), y=rand(Bool, 3))) == + AbstractVector{Union{Continuous, Count}} + @test MLJBase.guess_observation_scitype((x=[missing, 1, 2], y=[1, 2, 3])) == + Unknown + @test MLJBase.guess_observation_scitype(5) == Unknown +end + +mutable struct DRegressor2 <: Deterministic end +MLJBase.target_scitype(::Type{<:DRegressor2}) = + AbstractVector{<:Continuous} + +@test MLJBase.guess_model_target_observation_scitype(DRegressor2()) == Continuous + +@testset "pretty" begin + X = (x=fill(1, 3), y=fill(2, 3)) + io = IOBuffer() + pretty(X) + pretty(io, X) + str = take!(io) |> String + @test contains(str, "x") + @test contains(str, "y") + @test contains(str, "│") +end + end # module true