diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ac1e885e..31959ec6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -4,7 +4,7 @@ on:
     branches:
       - master
       - dev
-      - for-a-0-point-21-release
+      - for-a-0-point-22-release
       - next-breaking-release
   push:
     branches:
diff --git a/Project.toml b/Project.toml
index cfb0d85f..dc7cd027 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "MLJBase"
 uuid = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
 authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>"]
-version = "0.21.14"
+version = "1.0.0"
 
 [deps]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
@@ -13,8 +13,8 @@ Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
+LearnAPI = "92ad9a40-7767-427a-9ee6-6e577f1266cb"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7"
 MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
 Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
 OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
@@ -22,13 +22,22 @@ Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
 PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+StatisticalMeasures = "a19d573c-0a75-4610-95b3-7071388c7541"
+StatisticalMeasuresBase = "c062fc1d-0d66-479b-b6ac-8b44719de4cc"
 StatisticalTraits = "64bff920-2084-43da-a3e6-9bb72801c0c9"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 
+[weakdeps]
+StatisticalMeasures = "a19d573c-0a75-4610-95b3-7071388c7541"
+
+[extensions]
+DefaultMeasuresExt = "StatisticalMeasures"
+
 [compat]
 CategoricalArrays = "0.9, 0.10"
 CategoricalDistributions = "0.1"
@@ -36,14 +45,17 @@ ComputationalResources = "0.3"
 DelimitedFiles = "1"
 Distributions = "0.25.3"
 InvertedIndices = "1"
-LossFunctions = "0.11"
+LearnAPI = "0.1"
 MLJModelInterface = "1.7"
 Missings = "0.4, 1"
 OrderedCollections = "1.1"
 Parameters = "0.12"
 PrettyTables = "1, 2"
 ProgressMeter = "1.7.1"
+Reexport = "1.2"
 ScientificTypes = "3"
+StatisticalMeasures = "0.1.1"
+StatisticalMeasuresBase = "0.1.1"
 StatisticalTraits = "3.2"
 StatsBase = "0.32, 0.33, 0.34"
 Tables = "0.2, 1.0"
@@ -57,8 +69,9 @@ Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411"
 NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
+StatisticalMeasures = "a19d573c-0a75-4610-95b3-7071388c7541"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"
 
 [targets]
-test = ["DataFrames", "DecisionTree", "Distances", "Logging", "MultivariateStats", "NearestNeighbors", "StableRNGs", "Test", "TypedTables"]
+test = ["DataFrames", "DecisionTree", "Distances", "Logging", "MultivariateStats", "NearestNeighbors", "StableRNGs", "StatisticalMeasures", "Test", "TypedTables"]
diff --git a/README.md b/README.md
index 9323a9c2..8e9fc1e5 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,11 @@ repository provides core functionality for MLJ, including:
 
 - basic utilities for **manipulating datasets** and for **synthesizing datasets** (src/data)
   
-- a [small interface](https://alan-turing-institute.github.io/MLJ.jl/dev/evaluating_model_performance/#Custom-resampling-strategies-1) for **resampling strategies** and implementations, including `CV()`, `StratifiedCV` and `Holdout` (src/resampling.jl)
+- a [small
+  interface](https://alan-turing-institute.github.io/MLJ.jl/dev/evaluating_model_performance/#Custom-resampling-strategies-1)
+  for **resampling strategies** and implementations, including `CV()`, `StratifiedCV` and
+  `Holdout` (src/resampling.jl). Actual performance evaluation measures (aka metrics), which previously
+  were provided by MLJBase.jl, now live in [StatisticalMeasures.jl](https://juliaai.github.io/StatisticalMeasures.jl/dev/).
 
 - methods for **performance evaluation**, based on those resampling strategies (src/resampling.jl)
 
@@ -44,9 +48,4 @@ repository provides core functionality for MLJ, including:
   associated methods, for use with
   [MLJTuning](https://github.com/JuliaAI/MLJTuning.jl) (src/hyperparam)
 
-- a [small
-  interface](https://alan-turing-institute.github.io/MLJ.jl/dev/performance_measures/#Traits-and-custom-measures-1)
-  for **performance measures** (losses and scores), implementation of about 60 such measures, including integration of the
-  [LossFunctions.jl](https://github.com/JuliaML/LossFunctions.jl)
-  library (src/measures). To be migrated into separate package in the near future. 
 
diff --git a/ext/DefaultMeasuresExt.jl b/ext/DefaultMeasuresExt.jl
new file mode 100644
index 00000000..a06cd00f
--- /dev/null
+++ b/ext/DefaultMeasuresExt.jl
@@ -0,0 +1,15 @@
+module DefaultMeasuresExt
+
+using MLJBase
+import MLJBase:default_measure, ProbabilisticDetector, DeterministicDetector
+using StatisticalMeasures
+using StatisticalMeasures.ScientificTypesBase
+
+default_measure(::Deterministic, ::Type{<:Union{Continuous,Count}}) = l2
+default_measure(::Deterministic, ::Type{<:Finite}) = misclassification_rate
+default_measure(::Probabilistic, ::Type{<:Union{Finite,Count}}) = log_loss
+default_measure(::Probabilistic, ::Type{<:Continuous}) = log_loss
+default_measure(::ProbabilisticDetector, ::Type{<:OrderedFactor{2}}) = area_under_curve
+default_measure(::DeterministicDetector, ::Type{<:OrderedFactor{2}}) = balanced_accuracy
+
+end # module
diff --git a/src/MLJBase.jl b/src/MLJBase.jl
index 63fe4fd7..f0a19e93 100644
--- a/src/MLJBase.jl
+++ b/src/MLJBase.jl
@@ -3,6 +3,7 @@ module MLJBase
 # ===================================================================
 # IMPORTS
 
+using Reexport
 import Base: ==, precision, getindex, setindex!
 import Base.+, Base.*, Base./
 
@@ -16,7 +17,7 @@ for trait in StatisticalTraits.TRAITS
     eval(:(import StatisticalTraits.$trait))
 end
 
-import Base.instances # considered a trait for measures
+import LearnAPI
 import StatisticalTraits.snakecase
 import StatisticalTraits.info
 
@@ -47,7 +48,7 @@ end
 ###################
 # Hack Block ends #
 ###################
-
+import MLJModelInterface: ProbabilisticDetector, DeterministicDetector
 import MLJModelInterface: fit, update, update_data, transform,
     inverse_transform, fitted_params, predict, predict_mode,
     predict_mean, predict_median, predict_joint,
@@ -78,8 +79,6 @@ using ProgressMeter
 import .Threads
 
 # Operations & extensions
-import LossFunctions
-import LossFunctions.Traits
 import StatsBase
 import StatsBase: fit!, mode, countmap
 import Missings: levels
@@ -89,6 +88,9 @@ using CategoricalDistributions
 import Distributions: pdf, logpdf, sampler
 const Dist = Distributions
 
+# Measures
+import StatisticalMeasuresBase
+
 # from Standard Library:
 using Statistics, LinearAlgebra, Random, InteractiveUtils
 
@@ -128,57 +130,6 @@ const CatArrMissing{T,N} = ArrMissing{CategoricalValue{T},N}
 const MMI = MLJModelInterface
 const FI  = MLJModelInterface.FullInterface
 
-const MARGIN_LOSSES = [
-    :DWDMarginLoss,
-    :ExpLoss,
-    :L1HingeLoss,
-    :L2HingeLoss,
-    :L2MarginLoss,
-    :LogitMarginLoss,
-    :ModifiedHuberLoss,
-    :PerceptronLoss,
-    :SigmoidLoss,
-    :SmoothedL1HingeLoss,
-    :ZeroOneLoss
-]
-
-const DISTANCE_LOSSES = [
-    :HuberLoss,
-    :L1EpsilonInsLoss,
-    :L2EpsilonInsLoss,
-    :LPDistLoss,
-    :LogitDistLoss,
-    :PeriodicLoss,
-    :QuantileLoss
-]
-
-const WITH_PARAMETERS = [
-    :DWDMarginLoss,
-    :SmoothedL1HingeLoss,
-    :HuberLoss,
-    :L1EpsilonInsLoss,
-    :L2EpsilonInsLoss,
-    :LPDistLoss,
-    :QuantileLoss,
-]
-
-const MEASURE_TYPE_ALIASES = [
-    :FPR, :FNR, :TPR, :TNR,
-    :FDR, :PPV, :NPV, :Recall, :Specificity,
-    :MFPR, :MFNR, :MTPR, :MTNR,
-    :MFDR, :MPPV, :MNPV, :MulticlassRecall, :MulticlassSpecificity,
-    :MCR,
-    :MCC,
-    :BAC, :BACC,
-    :RMS, :RMSPV, :RMSL, :RMSLP, :RMSP,
-    :MAV, :MAE, :MAPE,
-    :RSQ, :LogCosh,
-    :CrossEntropy,
-    :AUC
-]
-
-const LOSS_FUNCTIONS = vcat(MARGIN_LOSSES, DISTANCE_LOSSES)
-
 # ===================================================================
 # Computational Resource
 # default_resource allows to switch the mode of parallelization
@@ -199,19 +150,13 @@ include("models.jl")
 include("sources.jl")
 include("machines.jl")
 
-include("composition/deprecated_abstract_types.jl")
 include("composition/learning_networks/nodes.jl")
 include("composition/learning_networks/inspection.jl")
 include("composition/learning_networks/signatures.jl")
-include("composition/learning_networks/deprecated_machines.jl")
 include("composition/learning_networks/replace.jl")
 
-include("composition/models/deprecated_pipelines.jl")
-include("composition/models/deprecated_methods.jl")
 include("composition/models/network_composite_types.jl")
 include("composition/models/network_composite.jl")
-include("composition/models/deprecated_from_network.jl")
-include("composition/models/inspection.jl")
 include("composition/models/pipelines.jl")
 include("composition/models/transformed_target_model.jl")
 
@@ -225,21 +170,14 @@ include("data/data.jl")
 include("data/datasets.jl")
 include("data/datasets_synthetic.jl")
 
-include("measures/measures.jl")
-include("measures/measure_search.jl")
-include("measures/doc_strings.jl")
+include("default_measures.jl")
 
 include("composition/models/stacking.jl")
 
-# function on the right-hand side is defined in src/measures/meta_utilities.jl:
-const MEASURE_TYPES_ALIASES_AND_INSTANCES = measures_for_export()
-
 const EXTENDED_ABSTRACT_MODEL_TYPES = vcat(
     MLJBase.MLJModelInterface.ABSTRACT_MODEL_SUBTYPES,
     MLJBase.NETWORK_COMPOSITE_TYPES, # src/composition/models/network_composite_types.jl
-    MLJBase.COMPOSITE_TYPES, # src/composition/abstract_types.jl
-    MLJBase.SURROGATE_TYPES, # src/composition/abstract_types.jl
-    [:MLJType, :Model, :NetworkComposite, :Surrogate, :Composite],
+    [:MLJType, :Model, :NetworkComposite],
 )
 
 # ===================================================================
@@ -337,8 +275,8 @@ export machine, Machine, fit!, report, fit_only!, default_scitype_check_level,
 # datasets_synthetics.jl
 export make_blobs, make_moons, make_circles, make_regression
 
-# composition (surrogates and composites are exported in composition):
-export machines, sources, @from_network, @pipeline, Stack,
+# composition
+export machines, sources, Stack,
     glb, @tuple, node, @node, sources, origins, return!,
     nrows_at_source, machine, rebind!, nodes, freeze!, thaw!,
     Node, AbstractNode, Pipeline,
@@ -357,23 +295,8 @@ export ResamplingStrategy, Holdout, CV, StratifiedCV, TimeSeriesCV,
 # -------------------------------------------------------------------
 # exports from MLJBase specific to measures
 
-# measure names:
-for m in MEASURE_TYPES_ALIASES_AND_INSTANCES
-    :(export $m) |> eval
-end
-
-# measures/registry.jl:
-export measures, metadata_measure
-
 # measure/measures.jl (excluding traits):
-export aggregate, default_measure, value, skipinvalid
-
-# measures/probabilistic:
-export roc_curve, roc
-
-# measures/finite.jl (averaging modes for multiclass scores)
-export no_avg, macro_avg, micro_avg
-
+export default_measure
 
 # -------------------------------------------------------------------
 # re-export from Random, StatsBase, Statistics, Distributions,
@@ -381,4 +304,10 @@ export no_avg, macro_avg, micro_avg
 export pdf, sampler, mode, median, mean, shuffle!, categorical, shuffle,
    levels, levels!, std, Not, support, logpdf, LittleDict
 
+# for julia < 1.9
+if !isdefined(Base, :get_extension)
+    include(joinpath("..","ext", "DefaultMeasuresExt.jl"))
+    @reexport using .DefaultMeasuresExt.StatisticalMeasures
+end
+
 end # module
diff --git a/src/composition/deprecated_abstract_types.jl b/src/composition/deprecated_abstract_types.jl
deleted file mode 100644
index e71ef88e..00000000
--- a/src/composition/deprecated_abstract_types.jl
+++ /dev/null
@@ -1,40 +0,0 @@
-##  COMPOSITE AND SURRUGOTE MODEL TYPES
-
-# For example, we want to define
-
-# abstract type ProbabilisticComposite <: Probabilistic end
-# struct ProbabilisticSurrogate <: Probabilistic end
-# Probabilistic() = ProbablisiticSurrogate()
-
-# but also want this for all the abstract `Model` subtypes:
-
-const COMPOSITE_TYPES = Symbol[]
-const SURROGATE_TYPES = Symbol[]
-const composite_types = Any[]
-const surrogate_types = Any[]
-
-for T in MLJModelInterface.ABSTRACT_MODEL_SUBTYPES
-    composite_type_name = string(T, "Composite") |> Symbol
-    surrogate_type_name = string(T, "Surrogate") |> Symbol
-
-    @eval(abstract type $composite_type_name <: $T end)
-    @eval(struct $surrogate_type_name <: $T end)
-
-    push!(COMPOSITE_TYPES, composite_type_name)
-    push!(SURROGATE_TYPES, surrogate_type_name)
-    push!(composite_types, @eval($composite_type_name))
-    push!(surrogate_types, @eval($surrogate_type_name))
-
-    # shorthand surrogate constructor:
-    @eval($T() = $surrogate_type_name())
-end
-
-
-const Surrogate = Union{surrogate_types...}
-const Composite = Union{composite_types...}
-
-MLJModelInterface.is_wrapper(::Type{<:Union{Composite,Surrogate}}) = true
-MLJModelInterface.package_name(::Type{<:Union{Composite,Surrogate}}) = "MLJBase"
-for T in surrogate_types
-    MLJModelInterface.load_path(::Type{T}) = string("MLJBase.", T)
-end
diff --git a/src/composition/learning_networks/deprecated_machines.jl b/src/composition/learning_networks/deprecated_machines.jl
deleted file mode 100644
index 9080d196..00000000
--- a/src/composition/learning_networks/deprecated_machines.jl
+++ /dev/null
@@ -1,440 +0,0 @@
-# # SIGNATURES
-
-function _operation_part(signature)
-    ops = filter(in(OPERATIONS), keys(signature))
-    return NamedTuple{ops}(map(op->getproperty(signature, op), ops))
-end
-function _report_part(signature)
-    :report in keys(signature) || return NamedTuple()
-    return signature.report
-end
-
-_operations(signature) = keys(_operation_part(signature))
-
-function _nodes(signature)
-    return (values(_operation_part(signature))...,
-            values(_report_part(signature))...)
-end
-
-function _call(nt::NamedTuple)
-    _call(n) = deepcopy(n())
-    _keys = keys(nt)
-    _values = values(nt)
-    return NamedTuple{_keys}(_call.(_values))
-end
-
-"""
-    model_supertype(interface)
-
-Return, if this can be inferred, which of `Deterministic`,
-`Probabilistic` and `Unsupervised` is the appropriate supertype for a
-composite model obtained by exporting a learning network with the
-specified learning network interface.
-
-$DOC_NETWORK_INTERFACES
-
-If a supertype cannot be inferred, `nothing` is returned.
-
-If the network with given `signature` is not exportable, this method
-will not error but it will not a give meaningful return value either.
-
-**Private method.**
-
-"""
-function model_supertype(signature)
-
-    operations = _operations(signature)
-
-    length(intersect(operations, (:predict_mean, :predict_median))) == 1 &&
-        return Deterministic
-
-    if :predict in operations
-        node = signature.predict
-        if node isa Source
-            return Deterministic
-        end
-        if node.machine !== nothing
-            model = node.machine.model
-            model isa Deterministic && return Deterministic
-            model isa Probabilistic && return Probabilistic
-        end
-    end
-
-    return nothing
-
-end
-
-
-# # FITRESULTS FOR COMPOSITE MODELS
-
-mutable struct CompositeFitresult
-    signature
-    glb
-    network_model_names
-    function CompositeFitresult(signature)
-        signature_node = glb(_nodes(signature)...)
-        new(signature, signature_node)
-    end
-end
-signature(c::CompositeFitresult) = getfield(c, :signature)
-glb(c::CompositeFitresult) = getfield(c, :glb)
-
-# To accommodate pre-existing design (operations.jl) arrange
-# that `fitresult.predict` returns the predict node, etc:
-Base.propertynames(c::CompositeFitresult) = keys(signature(c))
-Base.getproperty(c::CompositeFitresult, name::Symbol) =
-    getproperty(signature(c), name)
-
-
-# # LEARNING NETWORK MACHINES
-
-surrogate(::Type{<:Deterministic})  = Deterministic()
-surrogate(::Type{<:Probabilistic})  = Probabilistic()
-surrogate(::Type{<:Unsupervised}) = Unsupervised()
-surrogate(::Type{<:Static}) = Static()
-
-caches_data_by_default(::Type{<:Surrogate}) = false
-
-const ERR_MUST_PREDICT = ArgumentError(
-    "You must specify at least `predict=<some node>`. ")
-const ERR_MUST_TRANSFORM = ArgumentError(
-    "You must specify at least `transform=<some node>`. ")
-const ERR_MUST_OPERATE = ArgumentError(
-    "You must specify at least one operation, as in `predict=<some node>`. ")
-const ERR_MUST_SPECIFY_SOURCES = ArgumentError(
-    "You must specify at least one source `Xs`, as in "*
-    "`machine(surrogate_model, Xs, ...; kwargs...)`. ")
-const ERR_BAD_SIGNATURE = ArgumentError(
-    "Only the following keyword arguments are supported in learning network "*
-    "machine constructors: `report` or one of: `$OPERATIONS`. ")
-const ERR_EXPECTED_NODE_IN_SIGNATURE = ArgumentError(
-    "Learning network machine constructor syntax error. "*
-    "Did not enounter `Node` in place one was expected. ")
-
-function check_surrogate_machine(::Surrogate, signature, _sources)
-    isempty(_operations(signature)) && throw(ERR_MUST_OPERATE)
-    isempty(_sources) && throw(ERR_MUST_SPECIFY_SOURCES)
-    return nothing
-end
-
-function check_surrogate_machine(::Union{Supervised,SupervisedAnnotator},
-                                 signature,
-                                 _sources)
-    isempty(_operations(signature)) && throw(ERR_MUST_PREDICT)
-    length(_sources) > 1 || throw(err_supervised_nargs())
-    return nothing
-end
-
-function check_surrogate_machine(::Union{Unsupervised},
-                                 signature,
-                                 _sources)
-    isempty(_operations(signature)) && throw(ERR_MUST_TRANSFORM)
-    length(_sources) < 2 || throw(err_unsupervised_nargs())
-    return nothing
-end
-
-const WARN_NETWORK_MACHINES_DEPRECATION =
-    "Learning network machines are deprecated. For the recommended way of exporting "*
-    "learning networks as new stand-alone model types, see the \"Learning Networks\" "*
-    "section of the MLJ manual. "
-
-function machine(model::Surrogate, _sources::Source...; depwarn=true, pair_itr...)
-
-    depwarn && Base.depwarn(WARN_NETWORK_MACHINES_DEPRECATION, :machine, force=true)
-
-    # named tuple, such as `(predict=yhat, transform=W)`:
-    signature = (; pair_itr...)
-
-    # signature checks:
-    isempty(_operations(signature)) && throw(ERR_MUST_OPERATE)
-    for k in keys(signature)
-        if k in OPERATIONS
-            getproperty(signature, k) isa AbstractNode ||
-                throw(ERR_EXPECTED_NODE_IN_SIGNATURE)
-        elseif k === :report
-            all(v->v isa AbstractNode, values(signature.report)) ||
-                throw(ERR_EXPECTED_NODE_IN_SIGNATURE)
-        else
-            throw(ERR_BAD_SIGNATURE)
-        end
-    end
-
-    check_surrogate_machine(model, signature, _sources)
-
-    mach = Machine(model, _sources...)
-
-    mach.fitresult = CompositeFitresult(signature)
-
-    return mach
-
-end
-
-function machine(_sources::Source...; depwarn=true, pair_itr...)
-
-    signature = (; pair_itr...)
-
-    T = model_supertype(signature)
-    if T == nothing
-        @warn "Unable to infer surrogate model type. \n"*
-            "Using Deterministic(). To override specify "*
-            "surrogate model, as in "*
-        "`machine(Probabilistic(), ...)` or `machine(Interval(), ...)`"
-        model = Deterministic()
-    else
-        model = surrogate(T)
-    end
-
-    return machine(model, _sources...; depwarn, pair_itr...)
-
-end
-
-"""
-    N = glb(mach::Machine{<:Union{Composite,Surrogate}})
-
-A greatest lower bound for the nodes appearing in the learning network interface of
-`mach`.
-
-$DOC_NETWORK_INTERFACES
-
-**Private method.**
-
-"""
-glb(mach::Machine{<:Union{Composite,Surrogate}}) = glb(mach.fitresult)
-
-"""
-    report(fitresult::CompositeFitresult)
-
-Return a tuple combining the report from `fitresult.glb` (a `Node` report) with the
-additions coming from nodes declared as report nodes in `fitresult.signature`, but without
-merging the two.
-
-$DOC_NETWORK_INTERFACES
-
-**Private method**
-"""
-function report(fitresult::CompositeFitresult)
-    basic = report(glb(fitresult))
-    additions = _call(_report_part(signature(fitresult)))
-    return (; basic, additions)
-end
-
-"""
-    fit!(mach::Machine{<:Surrogate};
-         rows=nothing,
-         acceleration=CPU1(),
-         verbosity=1,
-         force=false))
-
-Train the complete learning network wrapped by the machine `mach`.
-
-More precisely, if `s` is the learning network signature used to
-construct `mach`, then call `fit!(N)`, where `N` is a greatest lower
-bound of the nodes appearing in the signature (values in the signature
-that are not `AbstractNode` are ignored). For example, if `s =
-(predict=yhat, transform=W)`, then call `fit!(glb(yhat, W))`.
-
-See also [`machine`](@ref)
-
-"""
-function fit!(mach::Machine{<:Surrogate}; kwargs...)
-    glb = MLJBase.glb(mach)
-    fit!(glb; kwargs...)
-    mach.state += 1
-    mach.report = Dict{Symbol,Any}(:fit => MLJBase.report(mach.fitresult))
-    mach.old_model = deepcopy(mach.model)
-    return mach
-end
-
-MLJModelInterface.fitted_params(mach::Machine{<:Surrogate}) =
-    fitted_params(glb(mach))
-
-
-# # CONSTRUCTING THE RETURN VALUE FOR A COMPOSITE FIT METHOD
-
-logerr_identical_models(name, model) =
-    "The hyperparameters $name of "*
-    "$model have identical model "*
-    "instances as values. "
-const ERR_IDENTICAL_MODELS = ArgumentError(
-    "Two distinct hyper-parameters of a "*
-    "composite model that are both "*
-    "associated with models in the underlying learning "*
-    "network (eg, any two components of a `@pipeline` model) "*
-    "cannot have identical values, although they can be `==` "*
-    "(corresponding nested properties are `==`). "*
-    "Consider constructing instances "*
-    "separately or use `deepcopy`. ")
-
-# Identify which properties of `model` have, as values, a model in the
-# learning network wrapped by `mach`, and check that no two such
-# properties have have identical values (#377). Return the property name
-# associated with each model in the network (in the order appearing in
-# `models(glb(mach))`) using `nothing` when the model is not
-# associated with any property.
-network_model_names(model::Nothing, mach::Machine{<:Surrogate}) = nothing
-
-function network_model_names(model::M, mach::Machine{<:Surrogate}) where M<:Model
-
-    network_model_ids = objectid.(MLJBase.models(glb(mach)))
-
-    names = propertynames(model)
-
-    # intialize dict to detect duplicity a la #377:
-    name_given_id = Dict{UInt64,Vector{Symbol}}()
-
-    # identify location of properties whose values are models in the
-    # learning network, and build name_given_id:
-    for name in names
-        id = objectid(getproperty(model, name))
-        if id in network_model_ids
-            if haskey(name_given_id, id)
-                push!(name_given_id[id], name)
-            else
-                name_given_id[id] = [name,]
-            end
-        end
-    end
-
-    # perform #377 check:
-    no_duplicates = all(values(name_given_id)) do name
-        length(name) == 1
-    end
-    if !no_duplicates
-        for (id, name) in name_given_id
-            if length(name) > 1
-                @error logerr_identical_models(name, model)
-            end
-        end
-        throw(ERR_IDENTICAL_MODELS)
-    end
-
-    return map(network_model_ids) do id
-        if id in keys(name_given_id)
-            return name_given_id[id] |> first
-        else
-            return nothing
-        end
-    end
-
-end
-
-const WARN_RETURN_DEPWARN =
-    "The use of `return!` is deprecated. For the recommended way of exporting "*
-    "learning networks as new stand-alone model types, see the \"Learning Networks\" "*
-    "section of the MLJ manual. "
-
-"""
-
-    return!(mach::Machine{<:Surrogate}, model, verbosity; acceleration=CPU1())
-
-The last call in custom code defining the `MLJBase.fit` method for a
-new composite model type. Here `model` is the instance of the new type
-appearing in the `MLJBase.fit` signature, while `mach` is a learning
-network machine constructed using `model`. Not relevant when defining
-composite models using `@pipeline` (deprecated) or `@from_network`.
-
-For usage, see the example given below. Specifically, the call does
-the following:
-
-- Determines which hyper-parameters of `model` point to model
-  instances in the learning network wrapped by `mach`, for recording
-  in an object called `cache`, for passing onto the MLJ logic that
-  handles smart updating (namely, an `MLJBase.update` fallback for
-  composite models).
-
-- Calls `fit!(mach, verbosity=verbosity, acceleration=acceleration)`.
-
-- Records (among other things) a copy of `model` in a variable called `cache`
-
-- Returns `cache` and outcomes of training in an appropriate form
-  (specifically, `(mach.fitresult, cache, mach.report)`; see [Adding
-  Models for General
-  Use](https://alan-turing-institute.github.io/MLJ.jl/dev/adding_models_for_general_use/)
-  for technical details.)
-
-
-### Example
-
-The following code defines, "by hand", a new model type `MyComposite`
-for composing standardization (whitening) with a deterministic
-regressor:
-
-```
-mutable struct MyComposite <: DeterministicComposite
-    regressor
-end
-
-function MLJBase.fit(model::MyComposite, verbosity, X, y)
-    Xs = source(X)
-    ys = source(y)
-
-    mach1 = machine(Standardizer(), Xs)
-    Xwhite = transform(mach1, Xs)
-
-    mach2 = machine(model.regressor, Xwhite, ys)
-    yhat = predict(mach2, Xwhite)
-
-    mach = machine(Deterministic(), Xs, ys; predict=yhat)
-    return!(mach, model, verbosity)
-end
-```
-
-"""
-function return!(mach::Machine{<:Surrogate},
-                 model::Union{Model,Nothing},
-                 verbosity;
-                 acceleration=CPU1(), depwarn=true)
-
-    depwarn && Base.depwarn(WARN_RETURN_DEPWARN, :return!, force=true)
-
-    network_model_names_ = network_model_names(model, mach)
-
-    verbosity isa Nothing || fit!(mach, verbosity=verbosity, acceleration=acceleration)
-    setfield!(mach.fitresult, :network_model_names, network_model_names_)
-
-    # record the current hyper-parameter values:
-    old_model = deepcopy(model)
-
-    glb = MLJBase.glb(mach)
-    cache = (; old_model)
-
-    return mach.fitresult, cache, report_given_method(mach)[:fit]
-end
-
-
-
-###############################################################################
-#####              SAVE AND RESTORE FOR COMPOSITES                        #####
-###############################################################################
-
-
-# Returns a new `CompositeFitresult` that is a shallow copy of the original one.
-function save(model::Composite, fitresult)
-    interface = MLJBase.signature(fitresult)
-    newsignature = replace(Signature(interface), serializable=true) |> unwrap
-    newfitresult = MLJBase.CompositeFitresult(newsignature)
-    setfield!(
-        newfitresult,
-        :network_model_names,
-        getfield(fitresult, :network_model_names)
-    )
-    return newfitresult
-end
-
-
-# Restores a machine of a composite model by restoring all
-# submachines contained in it.
-function restore!(mach::Machine{<:Composite})
-    glb_node = glb(mach)
-    for submach in machines(glb_node)
-        restore!(submach)
-    end
-    mach.state = 1
-    return mach
-end
-
-function report_for_serialization(mach::Machine{<:Composite})
-    basic = report(glb(mach.fitresult))
-    additions = report_given_method(mach)[:fit].additions
-    return Dict{Symbol,Any}(:fit => (; basic, additions))
-end
diff --git a/src/composition/learning_networks/replace.jl b/src/composition/learning_networks/replace.jl
index 3c0cc248..175c4b91 100644
--- a/src/composition/learning_networks/replace.jl
+++ b/src/composition/learning_networks/replace.jl
@@ -177,29 +177,6 @@ function Base.replace(signature::Signature, pairs::Pair...; node_dict=false, kwa
     return newsignature, newnode_given_old
 end
 
-"""
-    replace(mach, a1=>b1, a2=>b2, ...; options...)
-
-Return a copy the learning network machine `mach`, and it's underlying learning network,
-but replacing any specified sources and models `a1, a2, ...` of the original underlying
-network with `b1, b2, ...`.
-
-$DOC_REPLACE_OPTIONS
-
-"""
-function Base.replace(mach::Machine{<:Surrogate}, pairs::Pair...; kwargs...)
-    signature = MLJBase.signature(mach.fitresult) |> Signature
-
-    newsignature, newnode_given_old =
-        replace(signature, pairs...; node_dict=true, kwargs...)
-
-    newinterface = unwrap(newsignature)
-
-    newargs = [newnode_given_old[arg] for arg in mach.args]
-
-    return machine(mach.model, newargs...; newinterface...)
-end
-
 # Copy the complete learning network having `W` as a greatest lower bound, executing the
 # specified replacements, and return the dictionary mapping old nodes to new nodes.
 function _replace(
diff --git a/src/composition/learning_networks/signatures.jl b/src/composition/learning_networks/signatures.jl
index b224cd47..d49aace9 100644
--- a/src/composition/learning_networks/signatures.jl
+++ b/src/composition/learning_networks/signatures.jl
@@ -356,7 +356,7 @@ function fitted_params(signature::Signature; supplement=true)
 end
 
 """
-    output_and_report(signature, operation, Xnew)
+    output_and_report(signature, operation, Xnew...)
 
 **Private method.**
 
@@ -375,3 +375,6 @@ function output_and_report(signature, operation, Xnew)
     report = MLJBase.report(signature_clone; supplement=false)
     return output, report
 end
+# special case for static transformers with multiple inputs:
+output_and_report(signature, operation, Xnew...) =
+    output_and_report(signature, operation, Xnew)
diff --git a/src/composition/models/deprecated_from_network.jl b/src/composition/models/deprecated_from_network.jl
deleted file mode 100644
index dfe27807..00000000
--- a/src/composition/models/deprecated_from_network.jl
+++ /dev/null
@@ -1,272 +0,0 @@
-## EXPORTING LEARNING NETWORKS AS MODELS WITH @from_network
-
-# closure to generate the fit methods for exported composite. Here
-# `mach` is a learning network machine.
-function fit_method(mach, models...)
-
-    signature = mach.fitresult
-    mach_args = mach.args
-
-    function _fit(model, verbosity::Integer, args...)
-        length(args) > length(mach_args) &&
-            throw(ArgumentError("$M does not support more than "*
-                                "$(length(mach_args)) training arguments"))
-        replacement_models = [getproperty(model, fld)
-                              for fld in propertynames(model)]
-        model_replacements = [models[j] => replacement_models[j]
-                              for j in eachindex(models)]
-        source_replacements = [mach_args[i] => source(args[i])
-                               for i in eachindex(args)]
-        replacements = vcat(model_replacements, source_replacements)
-
-        new_mach =
-            replace(mach, replacements...; empty_unspecified_sources=true)
-
-        return!(new_mach, model, verbosity; depwarn=false)
-    end
-
-    return _fit
-end
-
-net_error(message) = throw(ArgumentError("Learning network export error.\n"*
-                                     string(message)))
-net_error(k::Int) = throw(ArgumentError("Learning network export error $k. "))
-
-_insert_subtyping(ex, subtype_ex) =
-    Expr(:(<:), ex, subtype_ex)
-
-# create the exported type symbol, e.g. abstract_type(T) == Unsupervised
-# would result in :UnsupervisedComposite
-_exported_type(T::Model) = Symbol(nameof(abstract_type(T)), :Composite)
-
-function eval_and_reassign(modl, ex)
-    s = gensym()
-    evaluated = modl.eval(ex)
-    if evaluated isa Symbol
-        hack = String(evaluated)
-        modl.eval(:($s = Symbol($hack)))
-    else
-        modl.eval(:($s = $evaluated))
-    end
-    return s, evaluated
-end
-
-function without_line_numbers(block_ex)
-    block_ex.head == :block || throw(ArgumentError)
-    args = filter(block_ex.args) do arg
-        !(arg isa LineNumberNode)
-    end
-    return Expr(:block, args...)
-end
-
-function from_network_preprocess(modl, mach_ex, block_ex)
-
-    mach_ex, mach  = eval_and_reassign(modl, mach_ex)
-    mach isa Machine{<:Surrogate} ||
-        net_error("$mach is not a learning network machine. ")
-    if block_ex.head == :block
-        block_ex = without_line_numbers(block_ex)
-        struct_ex = block_ex.args[1]
-        trait_declaration_exs = block_ex.args[2:end]
-    elseif block_ex.head == :struct
-        struct_ex = block_ex
-        trait_declaration_exs = []
-    else
-        net_error("Expected `struct`, `mutable struct` or "*
-                  "`begin ... end` block, but got `$block_ex` ")
-    end
-
-    # if necessary add or modify struct subtyping:
-    if struct_ex.args[2] isa Symbol
-        struct_ex.args[2] = _insert_subtyping(struct_ex.args[2],
-                                              _exported_type(mach.model))
-        modeltype_ex = struct_ex.args[2].args[1]
-    elseif struct_ex.args[2] isa Expr
-        struct_ex.args[2].head == :(<:) ||
-                    net_error("Badly formed `struct` subtying. ")
-        modeltype_ex = struct_ex.args[2].args[1]
-        super = eval(struct_ex.args[2].args[2])
-        inferred_super_ex = _exported_type(mach.model)
-        if !(super <: Composite)
-            @warn "New composite type must subtype `Composite` but "*
-            "`$super` does not. Instead declaring "*
-            "`$modeltype_ex <: $inferred_super_ex`. "
-            struct_ex.args[2].args[2] = inferred_super_ex
-        end
-    else
-        net_error(41)
-    end
-
-    # test if there are no fields:
-    field_exs = without_line_numbers(struct_ex.args[3]).args
-    no_fields = isempty(field_exs)
-
-    # extract trait definitions:
-    trait_ex_given_name_ex = Dict{Symbol,Any}()
-
-    ne() = net_error("Bad trait declaration. ")
-    for ex in trait_declaration_exs
-        ex isa Expr           || ne()
-        ex.head == :(=)       || ne()
-        ex.args[1] isa Symbol || ne()
-        ex.args[1] in MLJModelInterface.MODEL_TRAITS ||
-            net_error("Expected a model trait as keywork but "*
-                      "got $(ex.args[2]). Options are:\n"*
-                      "$MLJModelInterface.MODEL_TRAIES. ")
-        length(ex.args) == 2  || ne()
-        trait_ex_given_name_ex[ex.args[1]] = ex.args[2]
-    end
-
-    return mach_ex, modeltype_ex, struct_ex, no_fields, trait_ex_given_name_ex
-
-end
-
-function from_network_(modl,
-                       mach_ex,
-                       modeltype_ex,
-                       struct_ex,
-                       no_fields,
-                       trait_ex_given_name_ex)
-
-    args = gensym(:args)
-    models = gensym(:models)
-    instance = gensym(:instance)
-
-    # Define the new model type with keyword constructor:
-    if no_fields
-        modl.eval(struct_ex)
-    else
-        modl.eval(MLJBase.Parameters.with_kw(struct_ex, modl, false))
-    end
-
-    # Test that an instance can be created:
-    try
-        modl.eval(:($modeltype_ex()))
-    catch e
-        @error "Problem instantiating a default instance of the "*
-        "new composite type. Each field name in the struct expression "*
-        "must have a corresponding model instance (that also appears "*
-        "somewhere in the network). "*
-        "Perhaps you forgot to specify one of these?"
-        throw(e)
-    end
-
-    # code defining fit method:
-    program1 = quote
-
-        $(isdefined(modl, :MLJ) ? :(import MLJ.MLJBase) : :(import MLJBase))
-        $(isdefined(modl, :MLJ) ? :(import MLJ.MLJBase.MLJModelInterface) :
-          :(import MLJBase.MLJModelInterface))
-
-        $instance = $modeltype_ex()
-        $models = [getproperty($instance, name)
-                   for name in fieldnames($modeltype_ex)]
-
-        MLJModelInterface.fit(model::$modeltype_ex, verb::Integer, $args...) =
-            MLJBase.fit_method($mach_ex, $models...)(model, verb, $args...)
-
-    end
-
-    modl.eval(program1)
-
-    # define composite model traits:
-    for (name_ex, value_ex) in trait_ex_given_name_ex
-        program = quote
-            MLJBase.$name_ex(::Type{<:$modeltype_ex}) = $value_ex
-        end
-        modl.eval(program)
-    end
-
-    return nothing
-
-end
-
-const WARN_FROM_NETWORK_DEPRECATION =
-    "The `@from_network` macro is deprecated. See the \"Learning Networks\" section "*
-    "of the MLJ manual for recommended way to export learning networks as new "*
-    "composite model types. "
-
-"""
-
-    @from_network mach [mutable] struct NewCompositeModel
-           ...
-    end
-
-or
-
-    @from_network mach begin
-        [mutable] struct NewCompositeModel
-           ...
-        end
-        <optional trait declarations>
-    end
-
-Create a new stand-alone model type called `NewCompositeModel`, using
-the specified learning network machine `mach` as a blueprint.
-
-For more on learning network machines, see [`machine`](@ref).
-
-
-### Example
-
-Consider the following simple learning network for training a decision
-tree after one-hot encoding the inputs, and forcing the predictions to
-be point-predictions (rather than probabilistic):
-
-```julia
-Xs = source()
-ys = source()
-
-hot = OneHotEncoder()
-tree = DecisionTreeClassifier()
-
-W = transform(machine(hot, Xs), Xs)
-yhat = predict_mode(machine(tree, W, ys), W)
-```
-
-A learning network machine is defined by
-
-```julia
-mach = machine(Deterministic(), Xs, ys; predict=yhat)
-```
-
-To specify a new `Deterministic` composite model type `WrappedTree` we
-specify the model instances appearing in the network as "default"
-values in the following decorated struct definition:
-
-```julia
-@from_network mach struct WrappedTree
-    encoder=hot
-    decision_tree=tree
-end
-```
-and create a new instance with `WrappedTree()`.
-
-To allow the second model component to be replaced by any other
-probabilistic model we instead make a mutable struct declaration and,
-if desired, annotate types appropriately.  In the following code
-illustration some model trait declarations have also been added:
-
-```julia
-@from_network mach begin
-    mutable struct WrappedTree
-        encoder::OneHotEncoder=hot
-        classifier::Probabilistic=tree
-    end
-    input_scitype = Table(Continuous, Finite)
-    is_pure_julia = true
-end
-```
-
-"""
-macro nodepwarn_from_network(exs...)
-    args = from_network_preprocess(__module__, exs...)
-    modeltype_ex = args[2]
-    from_network_(__module__, args...)
-end
-macro from_network(exs...)
-    Base.depwarn(WARN_FROM_NETWORK_DEPRECATION, :from_network, force=true)
-    args = from_network_preprocess(__module__, exs...)
-    modeltype_ex = args[2]
-    from_network_(__module__, args...)
-end
diff --git a/src/composition/models/deprecated_methods.jl b/src/composition/models/deprecated_methods.jl
deleted file mode 100644
index 38b48e69..00000000
--- a/src/composition/models/deprecated_methods.jl
+++ /dev/null
@@ -1,74 +0,0 @@
-## FALL-BACK METHODS FOR COMPOSITE MODELS (EXPORTED LEARNING NETWORKS)
-
-# *Note.* Be sure to read Note 4 in src/operations.jl to see see how
-# fallbacks are provided for operations acting on Composite models.
-
-caches_data_by_default(::Type{<:Composite}) = true
-
-# builds on `fitted_params(::CompositeFitresult)` defined in
-# composition/learning_networks/machines.jl:
-fitted_params(::Union{Composite,Surrogate}, fitresult::CompositeFitresult) =
-    fitted_params(glb(fitresult))
-
-function update(model::M,
-                verbosity::Integer,
-                fitresult::CompositeFitresult,
-                cache,
-                args...) where M <: Composite
-
-    # This method falls back to `fit` to force rebuilding of the
-    # underlying learning network if, since the last fit:
-    #
-    # (i) Any hyper-parameter of `model` that has, as a value, a model in the network, has
-    #     been replaced with a new value (and not merely mutated), OR
-
-    # (ii) Any OTHER hyper-parameter has changed it's value (in the sense
-    # of `==`).
-
-    # Otherwise, a "smart" fit is carried out by calling `fit!` on a
-    # greatest lower bound node for nodes in the signature of the
-    # underlying learning network machine.
-
-    network_model_names = getfield(fitresult, :network_model_names)
-
-    old_model = cache.old_model
-    glb = MLJBase.glb(fitresult) # greatest lower bound of network, a node
-
-    if fallback(model, old_model, network_model_names, glb)
-        return fit(model, verbosity, args...)
-    end
-
-    fit!(glb; verbosity=verbosity)
-
-    # Retrieve additional report values
-    report = MLJBase.report(fitresult)
-
-    # record current model state:
-    cache = (; old_model = deepcopy(model))
-
-    return (fitresult,
-            cache,
-            report)
-
-end
-
-# helper for preceding method (where logic is explained):
-function fallback(model::M, old_model, network_model_names, glb_node) where M
-    # check the hyper-parameters corresponding to models:
-    network_models = MLJBase.models(glb_node)
-    for j in eachindex(network_models)
-        name = network_model_names[j]
-        name === nothing ||
-            objectid(network_models[j])===objectid(getproperty(model, name)) ||
-            return true
-    end
-    # check any other hyper-parameter:
-    for name in propertynames(model)
-        if !(name in network_model_names)
-            old_value = getproperty(old_model, name)
-            value = getproperty(model, name)
-            value == old_value || return true
-        end
-    end
-    return false
-end
diff --git a/src/composition/models/deprecated_pipelines.jl b/src/composition/models/deprecated_pipelines.jl
deleted file mode 100644
index fbebd897..00000000
--- a/src/composition/models/deprecated_pipelines.jl
+++ /dev/null
@@ -1,11 +0,0 @@
-const ERR_PIPELINE = ErrorException(
-    "The `@pipeline` macro is deprecated. For pipelines without "*
-    "target transformations use pipe syntax, as in "*
-    "`ContinuousEncoder() |> Standardizer() |> my_classifier`. "*
-    "For details and advanced optioins, query the `Pipeline` docstring. "*
-    "To wrap a supervised model in a target transformation, use "*
-    "`TransformedTargetModel`, as in "*
-    "`TransformedTargetModel(my_regressor, target=Standardizer())`"
-)
-
-macro pipeline(ex...) throw(ERR_PIPELINE) end
diff --git a/src/composition/models/inspection.jl b/src/composition/models/inspection.jl
deleted file mode 100644
index ece70326..00000000
--- a/src/composition/models/inspection.jl
+++ /dev/null
@@ -1,44 +0,0 @@
-## USER FRIENDLY INSPECTION OF COMPOSITE MACHINES
-
-try_scalarize(v) = length(v) == 1 ? v[1] : v
-
-function machines_given_model_name(mach::Machine{M}) where  M<:Composite
-    network_model_names = getfield(mach.fitresult, :network_model_names)
-    names = unique(filter(name->!(name === nothing), network_model_names))
-    glb = MLJBase.glb(mach)
-    network_models = MLJBase.models(glb)
-    network_machines = MLJBase.machines(glb)
-    ret = LittleDict{Symbol,Any}()
-    for name in names
-        mask = map(==(name), network_model_names)
-        _models = network_models[mask]
-        _machines = filter(mach->mach.model in _models, network_machines)
-        ret[name] = _machines
-    end
-    return ret
-end
-
-function tuple_keyed_on_model_names(machines, mach, f)
-    dict = MLJBase.machines_given_model_name(mach)
-    names = tuple(keys(dict)...)
-    named_tuple_values = map(names) do name
-        [f(m) for m in dict[name]] |> try_scalarize
-    end
-    return NamedTuple{names}(named_tuple_values)
-end
-
-function report(mach::Machine{<:Union{Composite,Surrogate}})
-    report_additions = MLJBase.report_given_method(mach)[:fit].additions
-    report_basic = MLJBase.report_given_method(mach)[:fit].basic
-    report_components = mach isa Machine{<:Surrogate} ? NamedTuple() :
-        MLJBase.tuple_keyed_on_model_names(report_basic.machines, mach, MLJBase.report)
-    return merge(report_components, report_basic, report_additions)
-end
-
-function fitted_params(mach::Machine{<:Composite})
-    fp_basic = fitted_params(mach.model, mach.fitresult)
-    machines = fp_basic.machines
-    fp_components =
-        MLJBase.tuple_keyed_on_model_names(machines, mach, MLJBase.fitted_params)
-    return merge(fp_components, fp_basic)
-end
diff --git a/src/composition/models/pipelines.jl b/src/composition/models/pipelines.jl
index 58da17c5..88e7c592 100644
--- a/src/composition/models/pipelines.jl
+++ b/src/composition/models/pipelines.jl
@@ -138,13 +138,7 @@ const ERR_MIXED_PIPELINE_SPEC = ArgumentError(
     "Either specify all pipeline components without names, as in "*
     "`Pipeline(model1, model2)` or specify names for all "*
     "components, as in `Pipeline(myfirstmodel=model1, mysecondmodel=model2)`. ")
-const ERR_USING_TARGET_KWARG = ArgumentError(
-    "You are not permitted to name a pipeline component \"target\", "*
-    "as this may be confused with the `target` keyword argument for "*
-    "the older `@pipeline` macro. `Pipeline` does not support target "*
-    "transformations. To implement one, wrap a supervised "*
-    "`model` using `TransformedTargetModel`, as in "*
-    "`TransformedTargetModel(model, transformer=Standardizer())`. ")
+
 
 # The following combines its arguments into a named tuple, performing
 # a number of checks and modifications. Specifically, it checks
@@ -277,7 +271,6 @@ function Pipeline(args...; prediction_type=nothing,
     # construct the named tuple of components:
     if isempty(args)
         _names = keys(kwargs)
-        :target in _names && throw(ERR_USING_TARGET_KWARG)
         _components = values(values(kwargs))
     else
         _names = Symbol[]
@@ -586,13 +579,6 @@ function supervised_component(pipe::SupervisedPipeline)
     return getproperty(named_components, name)
 end
 
-model_type(::Machine{M}) where M = M
-function supervised(machines)
-    model_types = model_type.(machines)
-    idx = findfirst(M -> M <: Supervised, model_types)
-    return machines[idx]
-end
-
 
 # ## Traits
 
diff --git a/src/composition/models/stacking.jl b/src/composition/models/stacking.jl
index 4a760e24..ec872c16 100644
--- a/src/composition/models/stacking.jl
+++ b/src/composition/models/stacking.jl
@@ -378,14 +378,23 @@ model_2, ...), ...)
 function internal_stack_report(
     stack::Stack{modelnames,},
     verbosity::Int,
-    tt_pairs,
+    tt_pairs, # train_test_pairs
     folds_evaluations...
 ) where modelnames
 
     n_measures = length(stack.measures)
     nfolds = length(tt_pairs)
 
-    # For each model we record the results mimicking the fields PerformanceEvaluation
+    test_fold_sizes = map(tt_pairs) do train_test_pair
+        test = last(train_test_pair)
+        length(test)
+    end
+
+    # weights to be used to aggregate per-fold measurements (averaging to 1):
+    fold_weights(mode) = nfolds .* test_fold_sizes ./ sum(test_fold_sizes)
+    fold_weights(::StatisticalMeasuresBase.Sum) = nothing
+
+    # For each model we record the results mimicking the fields of PerformanceEvaluation
     results = NamedTuple{modelnames}(
         [(
             model = model,
@@ -393,7 +402,7 @@ function internal_stack_report(
             measurement = Vector{Any}(undef, n_measures),
             operation = _actual_operations(nothing, stack.measures, model, verbosity),
             per_fold = [Vector{Any}(undef, nfolds) for _ in 1:n_measures],
-            per_observation = Vector{Union{Missing, Vector{Any}}}(missing, n_measures),
+            per_observation = [Vector{Vector{Any}}(undef, nfolds) for _ in 1:n_measures],
             fitted_params_per_fold = [],
             report_per_fold = [],
             train_test_pairs = tt_pairs,
@@ -419,30 +428,29 @@ function internal_stack_report(
                 model_results.operation,
             ))
                 ypred = operation(mach, Xtest)
-                loss = measure(ypred, ytest)
-                # Update per_observation
-                if reports_each_observation(measure)
-                    if model_results.per_observation[i] === missing
-                        model_results.per_observation[i] = Vector{Any}(undef, nfolds)
-                    end
-                    model_results.per_observation[i][foldid] = loss
-                end
+                measurements = StatisticalMeasuresBase.measurements(measure, ypred, ytest)
+
+                # Update per observation:
+                model_results.per_observation[i][foldid] = measurements
 
                 # Update per_fold
-                model_results.per_fold[i][foldid] =
-                    reports_each_observation(measure) ?
-                    MLJBase.aggregate(loss, measure) : loss
+                model_results.per_fold[i][foldid] = measure(ypred, ytest)
             end
             index += 1
         end
     end
 
-    # Update measurement field by aggregation
+    # Update measurement field by aggregating per-fold measurements
     for modelname in modelnames
         for (i, measure) in enumerate(stack.measures)
             model_results = results[modelname]
+            mode = StatisticalMeasuresBase.external_aggregation_mode(measure)
             model_results.measurement[i] =
-                MLJBase.aggregate(model_results.per_fold[i], measure)
+                StatisticalMeasuresBase.aggregate(
+                    model_results.per_fold[i];
+                    mode,
+                    weights=fold_weights(mode),
+                )
         end
     end
 
diff --git a/src/composition/models/transformed_target_model.jl b/src/composition/models/transformed_target_model.jl
index 02fca5eb..7b72419a 100644
--- a/src/composition/models/transformed_target_model.jl
+++ b/src/composition/models/transformed_target_model.jl
@@ -140,14 +140,11 @@ tmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(
 function TransformedTargetModel(
     args...;
     model=nothing,
-    target=nothing,    # to be deprecated
-    transformer=target,  # then this should be `nothing`
+    transformer=nothing,
     inverse=nothing,
     cache=true,
 )
 
-    isnothing(target) ||
-        Base.depwarn(WARN_TARGET_DEPRECATED, :TransformedTargetModel, force=true)
     length(args) < 2 || throw(ERR_TOO_MANY_ARGUMENTS)
 
     if length(args) === 1
diff --git a/src/default_measures.jl b/src/default_measures.jl
new file mode 100644
index 00000000..2488bbf5
--- /dev/null
+++ b/src/default_measures.jl
@@ -0,0 +1,23 @@
+# # DEFAULT MEASURES
+
+"""
+    default_measure(model)
+
+Return a measure that should work with `model`, or return `nothing` if none can be
+reliably inferred.
+
+For Julia 1.9 and higher, `nothing` is returned, unless StatisticalMeasures.jl is
+loaded.
+
+# New implementations
+
+This method dispatches `default_measure(model, observation_scitype)`, which has
+`nothing` as the fallback return value. Extend `default_measure` by overloading this
+version of the method. See for example the MLJBase.jl package extension,
+DefaultMeausuresExt.jl.
+
+"""
+default_measure(m) = nothing
+default_measure(m::Union{Supervised,Annotator}) =
+    default_measure(m, nonmissingtype(guess_model_target_observation_scitype(m)))
+default_measure(m, S) = nothing
diff --git a/src/machines.jl b/src/machines.jl
index 50544212..b6fabca6 100644
--- a/src/machines.jl
+++ b/src/machines.jl
@@ -682,7 +682,6 @@ function fit_only!(
                 @error "Problem fitting the machine $mach. "
                 _sources = sources(glb(mach.args...))
                 length(_sources) > 2 ||
-                    model isa Composite ||
                     all((!isempty).(_sources)) ||
                     @warn "Some learning network source nodes are empty. "
                 @info "Running type checks... "
diff --git a/src/measures/README.md b/src/measures/README.md
deleted file mode 100644
index 0097d2f7..00000000
--- a/src/measures/README.md
+++ /dev/null
@@ -1,117 +0,0 @@
-## Adding new measures
-
-This document assumes familiarity with the traits provided for
-measures. For a summary, query the docstring for
-`MLJBase.metadata_measures`.
-
-A measure is ordinarily called on data directly, as in
-
-```julia
-ŷ = rand(3) # predictions
-y = rand(3) # ground truth observations
-
-m = LPLoss(p=3)
-
-julia> m(ŷ, y)
-3-element Vector{Float64}:
- 0.07060087052171798
- 0.003020044780949528
- 0.019067038457889922
-```
-
-To call a measure without performing dimension or pool checks, one
-uses `MLJBase.call` instead:
-
-```julia
-MLJBase.call(m, ŷ, y)
-```
-
-A new measure reporting an aggregate measurement, such as
-`AreaUnderCurve`, will subtype `Aggregate`, and only needs to
-implement `call`. A measure that reports a measurement for each
-observation , such as `LPLoss`, subtypes `Unaggregated` and only needs
-to implement an evaluation method for single observations called
-`single`.
-
-Recall also that if a measure reports each observation, it does so
-even in the case that weights are additionally specified:
-
-```julia
-w = rand(3) # per-observation weights
-
-julia> m(ŷ, y, rand(3))
-3-element Vector{Float64}:
- 0.049333392516241206
- 0.0017612002314472718
- 0.003157450446692638
- ```
-
-This behaviour differs from other places where weights can only be
-specified as part of an aggregation of multi-observation measurements.
-
-
-### Unaggregated measures implement `single`
-
-To implement an `Unaggregated` measure, it suffices to implement `single(measure, η̂, η)`,
-which should return a measurement (e.g., a float) for a single example `(η̂, η)` (e.g., a
-pair of floats). There is no need for `single` to handle `missing` values. (Internally, a
-wrapper function `robust_single` handles these.)
-
-If only `single` is implemented, then the measure will automatically
-support per-observation weights and, where that makes sense, per-class
-weights. However, `supports_class_weights` may need to be overloaded,
-as this defaults to `false`.
-
-#### Special cases
-
-If `single` is *not* implemented, then `call(measure, ŷ, y)`, and optionally
-`call(measure, ŷ, y, w)`, must be implemented (the fallbacks call `robust_single`, a
-wrapped version of `single` that handles `missing` values).  In this case `y` and `ŷ` are
-arrays of matching size and the method should return an array of that size *without
-performing size or pool checks*. The method should handle `missing` and `NaN` values if
-possible, which should be propagated to relevant elements of the returned array.
-
-The `supports_weights` trait, which defaults to `true`, will need to
-be overloaded to return `false` if neither `single(::MyMeasure,
-args...)` nor `call(::MyMeasure, ŷ, y, w::AbstractArray)` are
-overloaded.
-
-### Aggregated measures implement `call`
-
-To implement an `Aggregated` measure, implement
-`call(measure::MyMeasure, ŷ, y)`. Optionally implement 
-`call(measure::MyMeasure, ŷ, y, w)`.
-
-
-### Trait declarations 
-
-Measure traits can be set using the `metadata_measure`
-function (query the doc-string) or individually, as in 
-
-```julia
-supports_weights(::Type{<:MyMeasure}) = false
-```
-
-Defaults are shown below
-
-trait                    | allowed values               | default 
--------------------------|------------------------------|--------------
-`target_scitype`         | some scientific type         | `Unknown`
-`human_name`             | any `String`                 | string version of type name
-`instances`              | any `Vector{String}`         | empty
-`prediction_type`        | `:deterministic`, `:probabilistic`, `:interval` `:unknown` | `:unknown`
-`orientation`            | `:score`, `:loss`, `:unknown`| `:unknown`
-`aggregation`            | `Mean()`, `Sum()`, `RootMeanSqaure()` | `Mean()`
-`supports_weights`       | `true` or `false`            | `true`
-`supports_class_weights` | `true` or `false`            | `false`
-`docstring`              | any `String`                 | includes `name`, `human_name` and `instances`
-`distribution_type`      | any `Distribution` subtype or `Unknown`   | `Unknown`
-
-### Exporting the measure and its aliases
-
-If you create a type alias, as in `const MAE = MeanAbsoluteValue`,
-then you must add this alias to the constant
-`MEASURE_TYPE_ALIASES`. That is the only step needed, as the the macro
-`@export_measures` programmatically exports all measure types and
-their instances, and those aliases listed in = MeanAbsoluteValue`,
-then you must add this alias to the constant `MEASURE_TYPE_ALIASES`.
diff --git a/src/measures/confusion_matrix.jl b/src/measures/confusion_matrix.jl
deleted file mode 100644
index fd35dd26..00000000
--- a/src/measures/confusion_matrix.jl
+++ /dev/null
@@ -1,273 +0,0 @@
-## CONFUSION MATRIX OBJECT
-
-"""
-    ConfusionMatrixObject{C}
-
-Confusion matrix with `C ≥ 2` classes. Rows correspond to predicted values
-and columns to the ground truth.
-"""
-struct ConfusionMatrixObject{C}
-    mat::Matrix
-    labels::Vector{String}
-end
-
-"""
-    ConfusionMatrixObject(m, labels)
-
-Instantiates a confusion matrix out of a square integer matrix `m`.
-Rows are the predicted class, columns the ground truth. See also the
-[wikipedia article](https://en.wikipedia.org/wiki/Confusion_matrix).
-
-"""
-function ConfusionMatrixObject(m::Matrix{Int}, labels::Vector{String})
-    s = size(m)
-    s[1] == s[2] || throw(ArgumentError("Expected a square matrix."))
-    s[1] > 1 || throw(ArgumentError("Expected a matrix of size ≥ 2x2."))
-    length(labels) == s[1] ||
-        throw(ArgumentError("As many labels as classes must be provided."))
-    ConfusionMatrixObject{s[1]}(m, labels)
-end
-
-# allow to access cm[i,j] but not set (it's immutable)
-Base.getindex(cm::ConfusionMatrixObject, inds...) = getindex(cm.mat, inds...)
-
-_levels(y1, y2) = vcat(levels(y1), levels(y2)) |> unique
-
-# simultaneous coercion of two vectors into categorical vectors having
-# the same pool:
-function _categorical(y1, y2)
-    L = _levels(y1, y2)
-    return categorical(y1, levels=L), categorical(y2, levels=L)
-end
-_categorical(y1::CategoricalArray{V1,N},
-             y2::CategoricalArray{V2,N}) where
-    {V, V1<:Union{Missing,V}, V2<:Union{Missing,V}, N} =
-    y1, y2
-_categorical(y1::AbstractArray{<:CategoricalArrays.CategoricalValue},
-             y2::AbstractArray{<:CategoricalArrays.CategoricalValue}) =
-    broadcast(identity, y1), broadcast(identity, y2)
-
-
-"""
-    _confmat(ŷ, y; rev=false)
-
-A private method. General users should use `confmat` or other instances
-of the measure type [`ConfusionMatrix`](@ref).
-
-Computes the confusion matrix given a predicted `ŷ` with categorical elements
-and the actual `y`. Rows are the predicted class, columns the ground truth.
-The ordering follows that of `levels(y)`.
-
-## Keywords
-
-* `rev=false`: in the binary case, this keyword allows to swap the ordering of
-               classes.
-* `perm=[]`:   in the general case, this keyword allows to specify a permutation
-               re-ordering the classes.
-* `warn=true`: whether to show a warning in case `y` does not have scientific
-               type `OrderedFactor{2}` (see note below).
-
-## Note
-
-To decrease the risk of unexpected errors, if `y` does not have
-scientific type `OrderedFactor{2}` (and so does not have a "natural
-ordering" negative-positive), a warning is shown indicating the
-current order unless the user explicitly specifies either `rev` or
-`perm` in which case it's assumed the user is aware of the class
-ordering.
-
-The `confusion_matrix` is a measure (although neither a score nor a
-loss) and so may be specified as such in calls to `evaluate`,
-`evaluate!`, although not in `TunedModel`s.  In this case, however,
-there no way to specify an ordering different from `levels(y)`, where
-`y` is the target.
-
-"""
-function _confmat(ŷraw::Union{Arr{V1,N}, CategoricalArray{V1,N}},
-                  yraw::Union{Arr{V2,N}, CategoricalArray{V2,N}};
-                  rev::Union{Nothing,Bool}=nothing,
-                  perm::Union{Nothing,Vector{<:Integer}}=nothing,
-                  warn::Bool=true) where
-    {V,V1<:Union{Missing,V}, V2<:Union{Missing,V},N}
-
-    # no-op if vectors already categorical arrays:
-    ŷ, y = _categorical(ŷraw, yraw)
-
-    levels_ = levels(y)
-    nc = length(levels_)
-    if rev !== nothing && rev && nc > 2
-        throw(ArgumentError("Keyword `rev` can only be used in binary case."))
-    end
-    if perm !== nothing && !isempty(perm)
-        length(perm) == nc ||
-            throw(ArgumentError("`perm` must be of length matching the "*
-                                "number of classes."))
-        Set(perm) == Set(collect(1:nc)) ||
-            throw(ArgumentError("`perm` must specify a valid permutation of "*
-                                "`[1, 2, ..., c]`, where `c` is "*
-                                "number of classes."))
-    end
-
-    # warning
-    if rev === nothing && perm === nothing
-        S = nonmissingtype(elscitype(y))
-        if warn
-            if nc==2 &&  !(S <: OrderedFactor)
-                @warn "The classes are un-ordered,\n" *
-                    "using: negative='$(levels_[1])' "*
-                    "and positive='$(levels_[2])'.\n" *
-                    "To suppress this warning, consider coercing "*
-                    "to OrderedFactor."
-            elseif !(S <: OrderedFactor)
-                @warn "The classes are un-ordered,\n" *
-                      "using order: $([l for l in levels_]).\n" *
-                      "To suppress this warning, consider "*
-                      "coercing to OrderedFactor."
-            end
-        end
-        rev  = false
-        perm = Int[]
-    elseif rev !== nothing && nc == 2
-        # rev takes precedence in binary case
-        if rev
-            perm = [2, 1]
-        else
-            perm = Int[]
-        end
-    end
-
-    # No permutation
-    if isempty(perm)
-        cmat = zeros(Int, nc, nc)
-        @inbounds for i in eachindex(y)
-            (isinvalid(y[i]) || isinvalid(ŷ[i])) && continue
-            cmat[int(ŷ[i]), int(y[i])] += 1
-        end
-        return ConfusionMatrixObject(cmat, string.(levels_))
-    end
-
-    # With permutation
-    cmat = zeros(Int, nc, nc)
-    iperm = invperm(perm)
-    @inbounds for i in eachindex(y)
-        (isinvalid(y[i]) || isinvalid(ŷ[i])) && continue
-        cmat[iperm[int(ŷ[i])], iperm[int(y[i])]] += 1
-    end
-    return ConfusionMatrixObject(cmat, string.(levels_[perm]))
-end
-
-
-# Machinery to display the confusion matrix in a non-confusing way
-# (provided the REPL is wide enough)
-
-splitw(w::Int) = (sp1 = div(w, 2); sp2 = w - sp1; (sp1, sp2))
-
-function Base.show(stream::IO, m::MIME"text/plain", cm::ConfusionMatrixObject{C}
-                   ) where C
-    width    = displaysize(stream)[2]
-    mincw    = ceil(Int, 12/C)
-    cw       = max(length(string(maximum(cm.mat))),maximum(length.(cm.labels)),mincw)
-    firstcw  = max(length(string(maximum(cm.mat))),maximum(length.(cm.labels)),9)
-    textlim  = 9
-    totalwidth = firstcw + cw * C + C + 2
-    width < totalwidth && (show(stream, m, cm.mat); return)
-
-    iob     = IOBuffer()
-    wline   = s -> write(iob, s * "\n")
-    splitcw = s -> (w = cw - length(s); splitw(w))
-    splitfirstcw = s -> (w = firstcw - length(s); splitw(w))
-    cropw   = s -> length(s) > textlim ? s[1:prevind(s, textlim)] * "…" : s
-
-    # 1.a top box
-    " "^(firstcw+1) * "┌" * "─"^((cw + 1) * C - 1) * "┐" |> wline
-    gt = "Ground Truth"
-    w  = (cw + 1) * C - 1 - length(gt)
-    sp1, sp2 = splitw(w)
-    " "^(firstcw+1) * "│" * " "^sp1 * gt * " "^sp2 * "│" |> wline
-    # 1.b separator
-    "┌" * "─"^firstcw * "┼" * ("─"^cw * "┬")^(C-1) * "─"^cw * "┤" |> wline
-    # 2.a description line
-    pr = "Predicted"
-    sp1, sp2 = splitfirstcw(pr)
-    partial = "│" * " "^sp1 * pr * " "^sp2 * "│"
-    for c in 1:C
-        # max = 10
-        s = cm.labels[c] |> cropw
-        sp1, sp2 = splitcw(s)
-        partial *= " "^sp1 * s * " "^sp2 * "│"
-    end
-    partial |> wline
-    # 2.b separating line
-    "├" * "─"^firstcw * "┼" * ("─"^cw * "┼")^(C-1) * ("─"^cw * "┤") |> wline
-    # 2.c line by line
-    for c in 1:C
-        # line
-        s  = cm.labels[c] |> cropw
-        sp1, sp2 = splitfirstcw(s)
-        partial = "│" * " "^sp1 * s * " "^sp2 * "│"
-        for r in 1:C
-            e = string(cm[c, r])
-            sp1, sp2 = splitcw(e)
-            partial *= " "^sp1 * e * " "^sp2 * "│"
-        end
-        partial |> wline
-        # separator
-        if c < C
-            "├" * "─"^firstcw * "┼" * ("─"^cw * "┼")^(C-1) * ("─"^cw * "┤") |> wline
-        end
-    end
-    # 2.d final line
-    "└" * "─"^firstcw * "┴" * ("─"^cw * "┴")^(C-1) * ("─"^cw * "┘") |> wline
-    write(stream, take!(iob))
-end
-
-
-## CONFUSION MATRIX AS MEASURE
-
-struct ConfusionMatrix <: Aggregated
-    perm::Union{Nothing,Vector{<:Integer}}
-end
-
-ConfusionMatrix(; perm=nothing) = ConfusionMatrix(perm)
-
-is_measure(::ConfusionMatrix) = true
-is_measure_type(::Type{ConfusionMatrix}) = true
-human_name(::Type{<:ConfusionMatrix}) = "confusion matrix"
-target_scitype(::Type{ConfusionMatrix}) =
-    Union{AbstractVector{<:Union{Missing,OrderedFactor}},
-          AbstractVector{<:Union{Missing,OrderedFactor}}}
-supports_weights(::Type{ConfusionMatrix}) = false
-prediction_type(::Type{ConfusionMatrix}) = :deterministic
-instances(::Type{<:ConfusionMatrix}) = ["confusion_matrix", "confmat"]
-orientation(::Type{ConfusionMatrix}) = :other
-reports_each_observation(::Type{ConfusionMatrix}) = false
-is_feature_dependent(::Type{ConfusionMatrix}) = false
-aggregation(::Type{ConfusionMatrix}) = Sum()
-
-@create_aliases ConfusionMatrix
-
-@create_docs(ConfusionMatrix,
-body=
-"""
-If `r` is the return value, then the raw confusion matrix is `r.mat`,
-whose rows correspond to predictions, and columns to ground truth.
-The ordering follows that of `levels(y)`.
-
-Use `ConfusionMatrix(perm=[2, 1])` to reverse the class order for binary
-data. For more than two classes, specify an appropriate permutation, as in
-`ConfusionMatrix(perm=[2, 3, 1])`.
-
-""",
-scitype=DOC_ORDERED_FACTOR_BINARY)
-
-# calling behaviour:
-call(m::ConfusionMatrix, ŷ, y) = _confmat(ŷ, y, perm=m.perm)
-
-# overloading addition to make aggregation work:
-Base.round(m::MLJBase.ConfusionMatrixObject; kws...) = m
-function Base.:+(m1::ConfusionMatrixObject, m2::ConfusionMatrixObject)
-    if m1.labels != m2.labels
-        throw(ArgumentError("Confusion matrix labels must agree"))
-    end
-    ConfusionMatrixObject(m1.mat + m2.mat, m1.labels)
-end
diff --git a/src/measures/continuous.jl b/src/measures/continuous.jl
deleted file mode 100644
index 33670216..00000000
--- a/src/measures/continuous.jl
+++ /dev/null
@@ -1,315 +0,0 @@
-const InfiniteArrMissing = Union{
-    AbstractArray{<:Union{Missing,Continuous}},
-    AbstractArray{<:Union{Missing,Count}}}
-
-# -----------------------------------------------------------
-# MeanAbsoluteError
-
-struct MeanAbsoluteError <: Aggregated end
-
-metadata_measure(MeanAbsoluteError;
-                 instances = ["mae", "mav", "mean_absolute_error",
-                              "mean_absolute_value"],
-                 target_scitype           = InfiniteArrMissing,
-                 prediction_type          = :deterministic,
-                 orientation              = :loss),
-
-const MAE = MeanAbsoluteError
-const MAV = MeanAbsoluteError
-@create_aliases MeanAbsoluteError
-
-@create_docs(MeanAbsoluteError,
-body=
-"""
-``\\text{mean absolute error} =  n^{-1}∑ᵢ|yᵢ-ŷᵢ|`` or
-``\\text{mean absolute error} = n^{-1}∑ᵢwᵢ|yᵢ-ŷᵢ|``
-""",
-scitype=DOC_INFINITE)
-
-call(::MeanAbsoluteError, ŷ, y) = abs.(ŷ .- y) |> skipinvalid |> mean
-call(::MeanAbsoluteError, ŷ, y, w) = abs.(ŷ .- y) .* w |> skipinvalid |> mean
-
-# ----------------------------------------------------------------
-# RootMeanSquaredError
-
-struct RootMeanSquaredError <: Aggregated end
-
-metadata_measure(RootMeanSquaredError;
-                 instances                = ["rms", "rmse",
-                                             "root_mean_squared_error"],
-                 target_scitype           = InfiniteArrMissing,
-                 prediction_type          = :deterministic,
-                 orientation              = :loss,
-                 aggregation              = RootMeanSquare())
-
-const RMS = RootMeanSquaredError
-@create_aliases RootMeanSquaredError
-
-@create_docs(RootMeanSquaredError,
-body=
-"""
-``\\text{root mean squared error} = \\sqrt{n^{-1}∑ᵢ|yᵢ-ŷᵢ|^2}`` or
-``\\text{root mean squared error} = \\sqrt{\\frac{∑ᵢwᵢ|yᵢ-ŷᵢ|^2}{∑ᵢwᵢ}}``
-""",
-scitype=DOC_INFINITE)
-
-call(::RootMeanSquaredError, ŷ, y) = (y .- ŷ).^2 |> skipinvalid |> mean |> sqrt
-call(::RootMeanSquaredError, ŷ, y, w) = (y .- ŷ).^2 .* w |> skipinvalid |> mean |> sqrt
-
-# -------------------------------------------------------------------------
-# R-squared (coefficient of determination)
-
-struct RSquared <: Aggregated end
-
-metadata_measure(RSquared;
-                 instances               = ["rsq", "rsquared"],
-                 target_scitype          = InfiniteArrMissing,
-                 prediction_type         = :deterministic,
-                 orientation             = :score,
-                 supports_weights        = false)
-
-const RSQ = RSquared
-@create_aliases RSquared
-
-@create_docs(RSquared,
-body=
-"""
-The R² (also known as R-squared or coefficient of determination) is suitable for
-interpreting linear regression analysis (Chicco et al., [2021](https://doi.org/10.7717/peerj-cs.623)).
-
-Let ``\\overline{y}`` denote the mean of ``y``, then
-
-``\\text{R^2} = 1 - \\frac{∑ (\\hat{y} - y)^2}{∑ \\overline{y} - y)^2}.``
-""",
-scitype=DOC_INFINITE)
-
-function call(::RSquared, ŷ, y)
-    num = (ŷ .- y).^2 |> skipinvalid |> sum
-    mean_y = mean(y)
-    denom = (mean_y .- y).^2 |> skipinvalid |> sum
-    return 1 - (num / denom)
-end
-
-# -------------------------------------------------------------------
-# LP
-
-struct LPLoss{T<:Real} <: Unaggregated
-    p::T
-end
-
-LPLoss(; p=2.0) = LPLoss(p)
-
-metadata_measure(LPLoss;
-                 instances = ["l1", "l2"],
-                 target_scitype           = InfiniteArrMissing,
-                 prediction_type          = :deterministic,
-                 orientation              = :loss)
-
-const l1 = LPLoss(1)
-const l2 = LPLoss(2)
-
-@create_docs(LPLoss,
-body=
-"""
-Constructor signature: `LPLoss(p=2)`. Reports
-`|ŷ[i] - y[i]|^p` for every index `i`.
-""",
-scitype=DOC_INFINITE)
-
-single(m::LPLoss, ŷ, y) =  abs(y - ŷ)^(m.p)
-
-# ----------------------------------------------------------------------------
-# RootMeanSquaredLogError
-
-struct RootMeanSquaredLogError <: Aggregated end
-
-metadata_measure(RootMeanSquaredLogError;
-                 instances = ["rmsl", "rmsle", "root_mean_squared_log_error"],
-                 target_scitype           = InfiniteArrMissing,
-                 prediction_type          = :deterministic,
-                 orientation              = :loss,
-                 aggregation              = RootMeanSquare())
-
-const RMSL = RootMeanSquaredLogError
-@create_aliases RootMeanSquaredLogError
-
-@create_docs(RootMeanSquaredLogError,
-body=
-"""
-``\\text{root mean squared log error} =
-\\sqrt{n^{-1}∑ᵢ\\log\\left({yᵢ \\over ŷᵢ}\\right)^2}``
-""",
-footer="See also [`rmslp1`](@ref).",
-scitype=DOC_INFINITE)
-
-call(::RootMeanSquaredLogError, ŷ, y) =
-    (log.(y) - log.(ŷ)).^2 |> skipinvalid |> mean |> sqrt
-call(::RootMeanSquaredLogError, ŷ, y, w) =
-    (log.(y) - log.(ŷ)).^2 .* w |> skipinvalid |> mean |> sqrt
-
-# ---------------------------------------------------------------------------
-#  RootMeanSquaredLogProportionalError
-
-struct RootMeanSquaredLogProportionalError{T<:Real} <: Aggregated
-    offset::T
-end
-
-RootMeanSquaredLogProportionalError(; offset=1.0) =
-    RootMeanSquaredLogProportionalError(offset)
-
-metadata_measure(RootMeanSquaredLogProportionalError;
-                 instances                = ["rmslp1", ],
-                 target_scitype           = InfiniteArrMissing,
-                 prediction_type          = :deterministic,
-                 orientation              = :loss,
-                 aggregation              = RootMeanSquare())
-
-const RMSLP = RootMeanSquaredLogProportionalError
-@create_aliases RootMeanSquaredLogProportionalError
-
-@create_docs(RootMeanSquaredLogProportionalError,
-body=
-"""
-Constructor signature: `RootMeanSquaredLogProportionalError(; offset = 1.0)`.
-
-``\\text{root mean squared log proportional error} =
-\\sqrt{n^{-1}∑ᵢ\\log\\left({yᵢ + \\text{offset} \\over ŷᵢ + \\text{offset}}\\right)}``
-""",
-footer="See also [`rmsl`](@ref). ",
-scitype=DOC_INFINITE)
-
-call(m::RMSLP, ŷ, y) =
-    (log.(y .+ m.offset) - log.(ŷ .+ m.offset)).^2 |>
-    skipinvalid |> mean |> sqrt
-
-call(m::RMSLP, ŷ, y, w) =
-    (log.(y .+ m.offset) - log.(ŷ .+ m.offset)).^2 .* w |>
-    skipinvalid |> mean |> sqrt
-
-# --------------------------------------------------------------------------
-# RootMeanSquaredProportionalError
-
-struct RootMeanSquaredProportionalError{T<:Real} <: Aggregated
-    tol::T
-end
-
-RootMeanSquaredProportionalError(; tol=eps()) =
-    RootMeanSquaredProportionalError(tol)
-
-metadata_measure(RootMeanSquaredProportionalError;
-    instances                = ["rmsp", ],
-    target_scitype           = InfiniteArrMissing,
-    prediction_type          = :deterministic,
-    orientation              = :loss,
-    aggregation              = RootMeanSquare())
-
-const RMSP = RootMeanSquaredProportionalError
-@create_aliases RMSP
-
-@create_docs(RootMeanSquaredProportionalError,
-body=
-"""
-Constructor keyword arguments: `tol` (default = `eps()`).
-
-``\\text{root mean squared proportional error} =
-\\sqrt{m^{-1}∑ᵢ \\left({yᵢ-ŷᵢ \\over yᵢ}\\right)^2}``
-
-where the sum is over indices such that `abs(yᵢ) > tol` and `m` is the number
-of such indices.
-
-""", scitype=DOC_INFINITE)
-
-function call(
-    m::RootMeanSquaredProportionalError,
-    ŷ,
-    y,
-    w=nothing,
-    )
-    ret = 0
-    count = 0
-    @inbounds for i in eachindex(y)
-        (isinvalid(y[i]) || isinvalid(ŷ[i])) && continue
-        ayi = abs(y[i])
-        if ayi > m.tol
-            dev = ((y[i] - ŷ[i]) / ayi)^2
-            ret += dev
-            ret = _scale(ret, w, i)
-            count += 1
-        end
-    end
-    return sqrt(ret / count)
-end
-
-# -----------------------------------------------------------------------
-# MeanAbsoluteProportionalError
-
-struct MeanAbsoluteProportionalError{T} <: Aggregated
-    tol::T
-end
-
-MeanAbsoluteProportionalError(; tol=eps()) = MeanAbsoluteProportionalError(tol)
-
-metadata_measure(MeanAbsoluteProportionalError;
-    instances                = ["mape", ],
-    target_scitype           = InfiniteArrMissing,
-    prediction_type          = :deterministic,
-    orientation              = :loss)
-
-const MAPE = MeanAbsoluteProportionalError
-@create_aliases MAPE
-
-@create_docs(MeanAbsoluteProportionalError,
-body=
-"""
-Constructor key-word arguments: `tol` (default = `eps()`).
-
-``\\text{mean absolute proportional error} =  m^{-1}∑ᵢ|{(yᵢ-ŷᵢ) \\over yᵢ}|``
-
-where the sum is over indices such that `abs(yᵢ) > tol` and `m` is the number
-of such indices.
-""", scitype=DOC_INFINITE)
-
-function call(
-    m::MeanAbsoluteProportionalError,
-    ŷ,
-    y,
-    w=nothing,
-    )
-    ret = 0
-    count = 0
-    @inbounds for i in eachindex(y)
-        (isinvalid(y[i]) || isinvalid(ŷ[i])) && continue
-        ayi = abs(y[i])
-        if ayi > m.tol
-        #if y[i] != zero(eltype(y))
-            dev = abs((y[i] - ŷ[i]) / ayi)
-            ret += dev
-            ret =_scale(ret, w, i)
-            count += 1
-        end
-    end
-    return ret / count
-end
-
-# -------------------------------------------------------------------------
-# LogCoshLoss
-
-struct LogCoshLoss <: Unaggregated end
-
-metadata_measure(LogCoshLoss;
-    instances                = ["log_cosh", "log_cosh_loss"],
-    target_scitype           = InfiniteArrMissing,
-    prediction_type          = :deterministic,
-    orientation              = :loss)
-
-const LogCosh = LogCoshLoss
-@create_aliases LogCoshLoss
-
-@create_docs(LogCoshLoss,
-             body="Reports ``\\log(\\cosh(ŷᵢ-yᵢ))`` for each index `i`. ",
-             scitype=DOC_INFINITE)
-
-_softplus(x::T) where T<:Real = x > zero(T) ? x + log1p(exp(-x)) : log1p(exp(x))
-_log_cosh(x::T) where T<:Real = x + _softplus(-2x) - log(convert(T, 2))
-
-single(::LogCoshLoss, ŷ, y) = _log_cosh(ŷ - y)
diff --git a/src/measures/doc_strings.jl b/src/measures/doc_strings.jl
deleted file mode 100644
index 03ed76df..00000000
--- a/src/measures/doc_strings.jl
+++ /dev/null
@@ -1,12 +0,0 @@
-# the following creates doc-strings for the aliases (`instances`) of each measure:
-
-for m in measures()
-    name = m.name
-    for instance in m.instances
-        alias = Symbol(instance)
-        quote
-            @doc "An instance of type [`$($name)`](@ref). "*
-                "Query the [`$($name)`](@ref) doc-string for details. " $alias
-        end |> eval
-    end
-end
diff --git a/src/measures/finite.jl b/src/measures/finite.jl
deleted file mode 100644
index 908525ab..00000000
--- a/src/measures/finite.jl
+++ /dev/null
@@ -1,1247 +0,0 @@
-const FiniteArrMissing{N} = Union{
-    AbstractArray{<:Union{Missing,Multiclass{N}}},
-    AbstractArray{<:Union{Missing,OrderedFactor{N}}}}
-
-# ---------------------------------------------------
-# misclassification rate
-
-struct MisclassificationRate <: Aggregated end
-
-metadata_measure(MisclassificationRate;
-                 instances  = ["misclassification_rate", "mcr"],
-                 target_scitype           = FiniteArrMissing,
-                 prediction_type          = :deterministic,
-                 orientation              = :loss)
-
-const MCR = MisclassificationRate
-@create_aliases MCR
-
-@create_docs(MisclassificationRate,
-body=
-"""
-A confusion matrix can also be passed as argument.
-$INVARIANT_LABEL
-""",
-scitype=DOC_FINITE)
-
-# calling behaviour:
-call(::MCR, ŷ, y) = (y .!= ŷ) |> Mean()
-call(::MCR, ŷ, y, w) = (y .!= ŷ) .* w |> Mean()
-(::MCR)(cm::ConfusionMatrixObject) = 1.0 - sum(diag(cm.mat)) / sum(cm.mat)
-
-# -------------------------------------------------------------
-# accuracy
-
-struct Accuracy <: Aggregated end
-
-metadata_measure(Accuracy;
-                 instances = ["accuracy",],
-                 target_scitype           = FiniteArrMissing,
-                 prediction_type          = :deterministic,
-                 orientation              = :score)
-
-@create_aliases Accuracy
-
-@create_docs(Accuracy,
-body=
-"""
-Accuracy is proportion of correct predictions `ŷ[i]` that match the
-ground truth `y[i]` observations. $INVARIANT_LABEL
-""",
-scitype=DOC_FINITE)
-
-# calling behaviour:
-call(::Accuracy, args...) = 1.0 - call(misclassification_rate, args...)
-(::Accuracy)(m::ConfusionMatrixObject) = sum(diag(m.mat)) / sum(m.mat)
-
-# -----------------------------------------------------------
-# balanced accuracy
-
-struct BalancedAccuracy <: Aggregated
-    adjusted::Bool
-end
-BalancedAccuracy(; adjusted=false) = BalancedAccuracy(adjusted)
-
-metadata_measure(BalancedAccuracy;
-                 instances = ["balanced_accuracy", "bacc", "bac"],
-                 target_scitype           = FiniteArrMissing,
-                 prediction_type          = :deterministic,
-                 orientation              = :score)
-
-const BACC = BalancedAccuracy
-@create_aliases BACC
-
-@create_docs(BalancedAccuracy,
-body=
-"""
-Balanced accuracy compensates standard [`Accuracy`](@ref) for class imbalance.
-See [https://en.wikipedia.org/wiki/Precision_and_recall#Imbalanced_data](https://en.wikipedia.org/wiki/Precision_and_recall#Imbalanced_data). 
-
-Setting `adjusted=true` rescales the score in the way prescribed in
-[L. Mosley (2013): A balanced approach to the multi-class imbalance
-problem. PhD thesis, Iowa State
-University](https://lib.dr.iastate.edu/etd/13537/). In the binary
-case, the adjusted balanced accuracy is also known as *Youden’s J
-statistic*, or *informedness*.
-
-$INVARIANT_LABEL
-""",
-scitype=DOC_FINITE)
-
-function call(m::BACC, ŷm, ym, wm=nothing)
-
-    ŷ, y, w = _skipinvalid(ŷm, ym, wm)
-
-    if w === nothing
-        n_given_class = StatsBase.countmap(y)
-        freq(i) = @inbounds n_given_class[y[i]]
-        ŵ = 1 ./ freq.(eachindex(y))
-    else # following sklearn, which is non-linear
-        ŵ = similar(w)
-        @inbounds for i in eachindex(w)
-            ŵ[i] = w[i] / sum(w .* (y .== y[i]))
-        end
-    end
-    s = sum(ŵ)
-    score = sum((ŷ .== y) .* ŵ) / sum(ŵ)
-    if m.adjusted
-        n_classes = length(levels(y))
-        chance = 1 / n_classes
-        score -= chance
-        score /= 1 - chance
-    end
-    return score
-end
-
-# ---------------------------------------------------
-# kappa
-
-struct Kappa <: Aggregated end
-
-metadata_measure(Kappa;
-                 instances  = ["kappa"],
-                 target_scitype           = FiniteArrMissing,
-                 prediction_type          = :deterministic,
-                 orientation              = :score,
-                 supports_weights         = false)
-
-@create_aliases Kappa
-
-@create_docs(Kappa,
-body=
-"""
-A metric to measure agreement between predicted labels and the ground truth. 
-See [https://en.wikipedia.org/wiki/Cohen%27s_kappa](https://en.wikipedia.org/wiki/Cohen%27s_kappa)
-
-$INVARIANT_LABEL
-""",
-scitype=DOC_FINITE)
-
-# calling behaviour:
-function (::Kappa)(cm::ConfusionMatrixObject{C}) where C
-    # relative observed agreement - same as accuracy
-    p₀ = sum(diag(cm.mat))/sum(cm.mat)
-
-    # probability of agreement due to chance - for each class cᵢ, this
-    # would be: (#predicted=cᵢ)/(#instances) x (#observed=cᵢ)/(#instances)
-    rows_sum = sum!(similar(cm.mat, 1, C), cm.mat) # 1 x C matrix
-    cols_sum = sum!(similar(cm.mat, C, 1), cm.mat) # C X 1 matrix
-    pₑ = first(rows_sum*cols_sum)/sum(rows_sum)^2
-
-    # Kappa calculation
-    κ = (p₀ - pₑ)/(1 - pₑ)
-
-    return κ
-end
-
-call(k::Kappa, ŷ, y) = _confmat(ŷ, y, warn=false) |> k
-
-
-# ==================================================================
-## DETERMINISTIC BINARY PREDICTIONS - ORDER-INDEPENDENT
-
-# ------------------------------------------------------------------
-# Matthew's correlation
-
-struct MatthewsCorrelation <: Aggregated end
-
-metadata_measure(MatthewsCorrelation;
-                 instances = ["matthews_correlation", "mcc"],
-                 target_scitype           = FiniteArrMissing{2},
-                 prediction_type          = :deterministic,
-                 orientation              = :score,
-                 supports_weights         = false)
-const MCC = MatthewsCorrelation
-@create_aliases MCC
-
-@create_docs(MatthewsCorrelation,
-body=
-"""
-[https://en.wikipedia.org/wiki/Matthews_correlation_coefficient](https://en.wikipedia.org/wiki/Matthews_correlation_coefficient)
-$INVARIANT_LABEL
-""",
-scitype=DOC_FINITE_BINARY)
-
-# calling behaviour:
-function (::MCC)(cm::ConfusionMatrixObject{C}) where C
-    # http://rk.kvl.dk/introduction/index.html
-    # NOTE: this is O(C^3), there may be a clever way to
-    # speed this up though in general this is only used for low  C
-    num = 0
-    @inbounds for k in 1:C, l in 1:C, m in 1:C
-        num += cm[k,k] * cm[l,m] - cm[k,l] * cm[m,k]
-    end
-    den1 = 0
-    den2 = 0
-    @inbounds for k in 1:C
-        a = sum(cm[k, :])
-        b = sum(cm[setdiff(1:C, k), :])
-        den1 += a * b
-        a = sum(cm[:, k])
-        b = sum(cm[:, setdiff(1:C, k)])
-        den2 += a * b
-    end
-    mcc = num / sqrt(float(den1) * float(den2))
-
-    isnan(mcc) && return 0
-    return mcc
-end
-
-call(m::MCC, ŷ, y) = _confmat(ŷ, y, warn=false) |> m
-
-
-# ==========================================================================
-# DETERMINISTIC BINARY PREDICTIONS - ORDER DEPENDENT
-
-const CM2 = ConfusionMatrixObject{2}
-
-# --------------------------------------------------------------------------
-# FScore
-
-struct FScore{T<:Real} <: Aggregated
-    β::T
-    rev::Union{Nothing,Bool}
-end
-
-FScore(; β=1.0, rev=nothing) = FScore(β, rev)
-
-metadata_measure(FScore;
-                 human_name = "F-Score",
-                 instances = ["f1score",],
-                 target_scitype           = FiniteArrMissing{2},
-                 prediction_type          = :deterministic,
-                 orientation              = :score,
-                 supports_weights         = false)
-
-@create_aliases FScore
-
-@create_docs(FScore,
-body=
-"""
-This is the one-parameter generalization, ``F_β``, of the F-measure or
-balanced F-score.
-
-[https://en.wikipedia.org/wiki/F1_score](https://en.wikipedia.org/wiki/F1_score)
-
-Constructor signature: `FScore(; β=1.0, rev=true)`.
-
-By default, the second element of `levels(y)` is designated as
-`true`. To reverse roles, specify `rev=true`.
-""",
-scitype=DOC_ORDERED_FACTOR_BINARY,
-footer="Constructor signature: `FScore(β=1.0, rev=false)`. ")
-
-# calling on conf matrix:
-function (score::FScore)(m::CM2)
-    β = score.β
-    β2   = β^2
-    tp = _tp(m)
-    fn = _fn(m)
-    fp = _fp(m)
-    return (1 + β2) * tp / ((1 + β2)*tp + β2*fn + fp)
-end
-
-# calling on arrays:
-call(m::FScore, ŷ, y) = _confmat(ŷ, y; rev=m.rev) |> m
-
-# -------------------------------------------------------------------------
-# TruePositive and its cousins - struct and metadata declerations
-
-const TRUE_POSITIVE_AND_COUSINS =
-    (:TruePositive, :TrueNegative, :FalsePositive, :FalseNegative,
-     :TruePositiveRate, :TrueNegativeRate, :FalsePositiveRate,
-     :FalseNegativeRate, :FalseDiscoveryRate, :Precision,
-     :NegativePredictiveValue)
-
-for M in TRUE_POSITIVE_AND_COUSINS
-    ex = quote
-        struct $M <: Aggregated rev::Union{Nothing,Bool} end
-        $M(; rev=nothing) = $M(rev)
-    end
-    eval(ex)
-end
-
-metadata_measure.((FalsePositive, FalseNegative);
-    target_scitype           = FiniteArrMissing{2},
-    prediction_type          = :deterministic,
-    orientation              = :loss,
-    aggregation              = Sum(),
-    supports_weights         = false)
-
-metadata_measure.((FalsePositiveRate, FalseNegativeRate, FalseDiscoveryRate);
-    target_scitype           = FiniteArrMissing{2},
-    prediction_type          = :deterministic,
-    orientation              = :loss,
-    supports_weights         = false)
-
-metadata_measure.((TruePositive, TrueNegative);
-    target_scitype           = FiniteArrMissing{2},
-    prediction_type          = :deterministic,
-    orientation              = :score,
-    aggregation              = Sum(),
-    supports_weights         = false)
-
-metadata_measure.((TruePositiveRate, TrueNegativeRate, Precision,
-                   NegativePredictiveValue);
-    target_scitype           = FiniteArrMissing{2},
-    prediction_type          = :deterministic,
-    orientation              = :score,
-    supports_weights         = false)
-
-# adjustments:
-instances(::Type{<:TruePositive}) = ["true_positive", "truepositive"]
-human_name(::Type{<:TruePositive})  = "number of true positives"
-
-instances(::Type{<:TrueNegative}) = ["true_negative", "truenegative"]
-human_name(::Type{<:TrueNegative}) = "number of true negatives"
-
-instances(::Type{<:FalsePositive}) = ["false_positive", "falsepositive"]
-human_name(::Type{<:FalsePositive}) = "number of false positives"
-
-instances(::Type{<:FalseNegative}) = ["false_negative", "falsenegative"]
-human_name(::Type{<:FalseNegative}) = "number of false negatives"
-
-instances(::Type{<:TruePositiveRate}) =
-    ["true_positive_rate", "truepositive_rate",
-     "tpr", "sensitivity", "recall", "hit_rate"]
-human_name(::Type{<:TruePositiveRate}) =
-    "true positive rate (a.k.a recall)"
-
-instances(::Type{<:TrueNegativeRate}) =
-    ["true_negative_rate", "truenegative_rate", "tnr",
-     "specificity", "selectivity"]
-
-instances(::Type{<:FalsePositiveRate}) =
-    ["false_positive_rate", "falsepositive_rate",
-     "fpr", "fallout"]
-                               "."
-instances(::Type{<:FalseNegativeRate}) =
-    ["false_negative_rate", "falsenegative_rate", "fnr", "miss_rate"]
-                               "."
-instances(::Type{<:FalseDiscoveryRate}) =
-    ["false_discovery_rate", "falsediscovery_rate", "fdr"]
-
-instances(::Type{<:NegativePredictiveValue}) =
-    ["negative_predictive_value", "negativepredictive_value", "npv"]
-
-instances(::Type{<:Precision}) =
-    ["positive_predictive_value", "ppv", "positivepredictive_value", "precision"]
-human_name(::Type{<:Precision}) =
-    "precision (a.k.a. positive predictive value)"
-
-
-# ---------------------------------------------------------------------
-# TruePositive and its cousins - doc-string building and alias creation
-
-for M in TRUE_POSITIVE_AND_COUSINS
-    eval(quote
-         $M == Precision || @create_aliases $M # precision handled separately
-
-         @create_docs($M,
-         body=
-         """
-         Assigns `false` to first element of `levels(y)`. To reverse roles,
-         use `$(name($M))(rev=true)`.
-         """,
-         scitype=DOC_ORDERED_FACTOR_BINARY)
-         end)
-end
-
-# type aliases:
-const TNR = TrueNegativeRate
-const Specificity = TrueNegativeRate
-const TPR = TruePositiveRate
-const Recall = TPR
-const FPR = FalsePositiveRate
-const FNR = FalseNegativeRate
-const FDR = FalseDiscoveryRate
-const NPV = NegativePredictiveValue
-const PPV = Precision
-
-# special case of precision; cannot generate alias's automatically due
-# to conflict with Base.precision:
-const positive_predictive_value = Precision()
-const ppv = Precision()
-const positivepredictive_value = Precision()
-
-# ----------------------------------------------------------------------
-# TruePositive and its cousins - helper functions for confusion matrices
-
-_tp(m::CM2) = m[2,2]
-_tn(m::CM2) = m[1,1]
-_fp(m::CM2) = m[2,1]
-_fn(m::CM2) = m[1,2]
-
-_tpr(m::CM2) = _tp(m) / (_tp(m) + _fn(m))
-_tnr(m::CM2) = _tn(m) / (_tn(m) + _fp(m))
-_fpr(m::CM2) = 1 - _tnr(m)
-_fnr(m::CM2) = 1 - _tpr(m)
-
-_fdr(m::CM2) = _fp(m) / (_tp(m) + _fp(m))
-_npv(m::CM2) = _tn(m) / (_tn(m) + _fn(m))
-
-# ----------------------------------------------------------------------
-# TruePositive and its cousins - calling behaviour
-
-# NOTE: here we assume the CM was constructed a priori with the
-# proper ordering so the field `rev` in the measure is ignored
-
-# on confusion matrices:
-(::TruePositive)(m::CM2)  = _tp(m)
-(::TrueNegative)(m::CM2)  = _tn(m)
-(::FalsePositive)(m::CM2) = _fp(m)
-(::FalseNegative)(m::CM2) = _fn(m)
-(::TPR)(m::CM2) = _tpr(m)
-(::TNR)(m::CM2) = _tnr(m)
-(::FPR)(m::CM2) = _fpr(m)
-(::FNR)(m::CM2) = _fnr(m)
-(::FDR)(m::CM2) = _fdr(m)
-(::NPV)(m::CM2) = _npv(m)
-(::Precision)(m::CM2) = 1.0 - _fdr(m)
-
-# on arrays (ŷ, y):
-for M_ex in TRUE_POSITIVE_AND_COUSINS
-    @eval call(m::$M_ex, ŷ, y) = _confmat(ŷ, y; rev=m.rev) |> m
-end
-
-# since Base.precision exists (as single argument function) we
-# manually overload Base.precision:
-Base.precision(m::CM2) = m |> Precision()
-function Base.precision(ŷ, y)
-    _check(Precision(), ŷ, y)
-    call(Precision(), ŷ, y)
-end
-
-
-# =================================================================
-# MULTICLASS AND ORDER INDEPENDENT
-
-const CM = ConfusionMatrixObject{N} where N
-
-abstract type MulticlassAvg end
-struct MacroAvg <: MulticlassAvg end
-struct MicroAvg <: MulticlassAvg end
-struct NoAvg <: MulticlassAvg end
-
-const macro_avg = MacroAvg()
-const micro_avg = MicroAvg()
-const no_avg    = NoAvg()
-
-const DS_AVG_RET = "Options for `average` are: `no_avg`, `macro_avg` "*
-    "(default) and `micro_avg`. Options for `return_type`, "*
-    "applying in the `no_avg` case, are: `LittleDict` (default) or "*
-    "`Vector`. "
-
-const DS_RET = "Options for `return_type` are: "*
-    "`LittleDict`(default) or "*
-    "`Vector`. "
-
-const CLASS_W = "An optional `AbstractDict`, denoted `class_w` above, "*
-    "keyed on `levels(y)`, specifies class weights. It applies if "*
-    "`average=macro_avg` or `average=no_avg`."
-
-"""
-    MulticlassFScore(; β=1.0, average=macro_avg, return_type=LittleDict)
-
-One-parameter generalization, ``F_β``, of the F-measure or balanced F-score for
-multiclass observations.
-
-    MulticlassFScore()(ŷ, y)
-    MulticlassFScore()(ŷ, y, class_w)
-
-Evaluate the default score on multiclass observations, `ŷ`, given
-ground truth values, `y`. $DS_AVG_RET $CLASS_W
-
-For more information, run `info(MulticlassFScore)`.
-
-"""
-struct MulticlassFScore{T<:Real,
-                        M<:MulticlassAvg,
-                        U<:Union{Vector, LittleDict}} <:Aggregated
-    β::T
-    average::M
-    return_type::Type{U}
-end
-
-MulticlassFScore(; β=1.0, average=macro_avg, return_type=LittleDict) =
-    MulticlassFScore(β, average, return_type)
-
-metadata_measure(MulticlassFScore;
-                 instances = ["macro_f1score", "micro_f1score",
-                              "multiclass_f1score"],
-                 target_scitype           = FiniteArrMissing,
-                 prediction_type          = :deterministic,
-                 orientation              = :score,
-                 supports_weights         = false,
-                 supports_class_weights   = true)
-
-MLJModelInterface.docstring(::Type{<:MulticlassFScore}) =
-    "Multiclass F_β score; aliases: " *
-    "`macro_f1score=MulticlassFScore()`, "*
-    "`multiclass_f1score=MulticlassFScore()` " *
-    "`micro_f1score=MulticlassFScore(average=micro_avg)`."
-
-const micro_f1score      = MulticlassFScore(average=micro_avg)
-const macro_f1score      = MulticlassFScore(average=macro_avg)
-const multiclass_f1score = MulticlassFScore(average=macro_avg)
-
-for M in (:MulticlassTruePositive, :MulticlassTrueNegative,
-          :MulticlassFalsePositive, :MulticlassFalseNegative)
-    ex = quote
-        struct $M{U<:Union{Vector, LittleDict}} <: Aggregated
-            return_type::Type{U}
-        end
-#        $M(return_type::Type{U}) where {U} = $M(return_type)
-        $M(; return_type=LittleDict) = $M(return_type)
-    end
-    eval(ex)
-end
-
-const _mtp_vec = MulticlassTruePositive(return_type=Vector)
-const _mfn_vec = MulticlassFalseNegative(return_type=Vector)
-const _mfp_vec = MulticlassFalsePositive(return_type=Vector)
-const _mtn_vec = MulticlassTrueNegative(return_type=Vector)
-
-for M in (:MulticlassTruePositiveRate, :MulticlassTrueNegativeRate,
-          :MulticlassFalsePositiveRate, :MulticlassFalseNegativeRate,
-          :MulticlassFalseDiscoveryRate, :MulticlassPrecision,
-          :MulticlassNegativePredictiveValue)
-    ex = quote
-        struct $M{T<:MulticlassAvg, U<:Union{Vector, LittleDict}} <: Aggregated
-            average::T
-            return_type::Type{U}
-        end
-        $M(; average=macro_avg, return_type=LittleDict) = $M(average, return_type)
-    end
-    eval(ex)
-end
-
-metadata_measure.((MulticlassFalsePositive, MulticlassFalseNegative);
-    target_scitype           = FiniteArrMissing,
-    prediction_type          = :deterministic,
-    orientation              = :loss,
-    aggregation               = Sum(),
-    is_feature_dependent     = false,
-    supports_weights         = false,
-    supports_class_weights   = false)
-
-metadata_measure.((MulticlassFalsePositiveRate, MulticlassFalseNegativeRate,
-                   MulticlassFalseDiscoveryRate);
-    target_scitype           = FiniteArrMissing,
-    prediction_type          = :deterministic,
-    orientation              = :loss,
-    is_feature_dependent     = false,
-    supports_weights         = false,
-    supports_class_weights   = true)
-
-metadata_measure.((MulticlassTruePositive, MulticlassTrueNegative);
-    target_scitype           = FiniteArrMissing,
-    prediction_type          = :deterministic,
-    orientation              = :score,
-    aggregation              = Sum(),
-    is_feature_dependent     = false,
-    supports_weights         = false,
-    supports_class_weights   = false)
-
-metadata_measure.((MulticlassTrueNegativeRate, MulticlassNegativePredictiveValue);
-    target_scitype           = FiniteArrMissing,
-    prediction_type          = :deterministic,
-    orientation              = :score,
-    is_feature_dependent     = false,
-    supports_weights         = false,
-    supports_class_weights   = true)
-
-metadata_measure.((MulticlassTruePositiveRate, MulticlassPrecision);
-    target_scitype           = FiniteArrMissing,
-    prediction_type          = :deterministic,
-    orientation              = :score,
-    is_feature_dependent     = false,
-    supports_weights         = false,
-    supports_class_weights   = true)
-
-MMI.docstring(::Type{<:MulticlassTruePositive})  =
-    "Number of true positives; " *
-    "aliases: `multiclass_true_positive`, `multiclass_truepositive`."
-instances(::Type{<:MulticlassTruePositive})  =
-    ["multiclass_true_positive", "multiclass_truepositive"]
-MMI.docstring(::Type{<:MulticlassTrueNegative})  =
-    "Number of true negatives; " *
-    "aliases: `multiclass_true_negative`, `multiclass_truenegative`."
-instances(::Type{<:MulticlassTrueNegative})  =
-    ["multiclass_true_negative", "multiclass_truenegative"]
-MMI.docstring(::Type{<:MulticlassFalsePositive}) =
-    "Number of false positives; " *
-    "aliases: `multiclass_false_positive`, `multiclass_falsepositive`."
-instances(::Type{<:MulticlassFalsePositive}) =
-    ["multiclass_false_positive", "multiclass_falsepositive"]
-MMI.docstring(::Type{<:MulticlassFalseNegative}) =
-    "Number of false negatives; " *
-    "aliases: `multiclass_false_negative`, `multiclass_falsenegative`."
-instances(::Type{<:MulticlassFalseNegative}) =
-    ["multiclass_false_negative", "multiclass_falsenegative"]
-
-MMI.docstring(::Type{<:MulticlassTruePositiveRate}) =
-    "multiclass true positive rate; aliases: " *
-    "`multiclass_true_positive_rate`, `multiclass_tpr`, " *
-    "`multiclass_sensitivity`, `multiclass_recall`, " *
-    "`multiclass_hit_rate`, `multiclass_truepositive_rate`, "
-instances(::Type{<:MulticlassTruePositiveRate}) =
-    ["multiclass_true_positive_rate", "multiclass_tpr",
-    "multiclass_sensitivity", "multiclass_recall",
-    "multiclass_hit_rate", "multiclass_truepositive_rate"]
-MMI.docstring(::Type{<:MulticlassTrueNegativeRate}) =
-    "multiclass true negative rate; aliases: " *
-    "`multiclass_true_negative_rate`, `multiclass_tnr` " *
-    " `multiclass_specificity`, `multiclass_selectivity`, " *
-    "`multiclass_truenegative_rate`."
-instances(::Type{<:MulticlassTrueNegativeRate}) =
-    ["multiclass_true_negative_rate", "multiclass_tnr",
-    "multiclass_specificity", "multiclass_selectivity",
-    "multiclass_truenegative_rate"]
-MMI.docstring(::Type{<:MulticlassFalsePositiveRate}) =
-                       "multiclass false positive rate; aliases: " *
-                       "`multiclass_false_positive_rate`, `multiclass_fpr` " *
-                       "`multiclass_fallout`, `multiclass_falsepositive_rate`."
-instances(::Type{<:MulticlassFalsePositiveRate}) =
-    ["multiclass_false_positive_rate", "multiclass_fpr",
-     "multiclass_fallout", "multiclass_falsepositive_rate"]
-MMI.docstring(::Type{<:MulticlassFalseNegativeRate}) =
-    "multiclass false negative rate; aliases: " *
-    "`multiclass_false_negative_rate`, `multiclass_fnr`, " *
-    "`multiclass_miss_rate`, `multiclass_falsenegative_rate`."
-instances(::Type{<:MulticlassFalseNegativeRate}) =
-    ["multiclass_false_negative_rate", "multiclass_fnr",
-    "multiclass_miss_rate", "multiclass_falsenegative_rate"]
-MMI.docstring(::Type{<:MulticlassFalseDiscoveryRate}) =
-    "multiclass false discovery rate; "*
-    "aliases: `multiclass_false_discovery_rate`, " *
-    "`multiclass_falsediscovery_rate`, `multiclass_fdr`."
-instances(::Type{<:MulticlassFalseDiscoveryRate}) =
-    ["multiclass_falsediscovery_rate", "multiclass_fdr",
-     "multiclass_false_discovery_rate"]
-MMI.docstring(::Type{<:MulticlassNegativePredictiveValue}) =
-    "multiclass negative predictive value; aliases: " *
-    "`multiclass_negative_predictive_value`, " *
-    "`multiclass_negativepredictive_value`, `multiclass_npv`."
-instances(::Type{<:MulticlassNegativePredictiveValue}) =
-    ["multiclass_negative_predictive_value",
-    "multiclass_negativepredictive_value", "multiclass_npv"]
-MMI.docstring(::Type{<:MulticlassPrecision}) =
-  "multiclass positive predictive value (aka precision);"*
-  " aliases: `multiclass_positive_predictive_value`, `multiclass_ppv`, " *
-  "`multiclass_positivepredictive_value`, " *
-  "`multiclass_precision`."
-instances(::Type{<:MulticlassPrecision}) =
-    ["multiclass_positive_predictive_value", "multiclass_ppv",
-     "multiclass_positivepredictive_value", "multiclass_precision"]
-
-const W_KEY_MISMATCH = "Encountered target with levels different from the " *
-                       "keys of user-specified dictionary of class weights."
-const W_PROMOTE_WARN = "Using macro averaging instead of micro averaging, as "*
-    "class weights specified. "
-
-
-# ----------------------------------------------------
-# MulticlassTruePositive
-
-"""
-    MulticlassTruePositive(; return_type=LittleDict)
-
-$(docstring(MulticlassTruePositive()))
-
-    MulticlassTruePositive()(ŷ, y)
-
-Number of true positives for multiclass observations `ŷ` and ground
-truth `y`, using default return type. $DS_RET
-
-For more information, run `info(MulticlassTruePositive)`.
-
-"""
-function MulticlassTruePositive end
-const multiclass_true_positive  = MulticlassTruePositive()
-const multiclass_truepositive   = MulticlassTruePositive()
-const mtp = MulticlassTruePositive()
-
-
-# ----------------------------------------------------
-# MulticlassTrueNegative
-
-"""
-    MulticlassTrueNegative(; return_type=LittleDict)
-
-$(docstring(MulticlassTrueNegative()))
-
-    MulticlassTrueNegative()(ŷ, y)
-
-Number of true negatives for multiclass observations `ŷ` and ground truth
-`y`, using default return type. $DS_RET
-
-For more information, run `info(MulticlassTrueNegative)`.
-
-"""
-function MulticlassTrueNegative end
-const multiclass_true_negative  = MulticlassTrueNegative()
-const multiclass_truenegative   = MulticlassTrueNegative()
-const mtn = MulticlassTrueNegative()
-
-
-# ----------------------------------------------------
-# MulticlassFalsePositive
-
-"""
-    MulticlassFalsePositive(; return_type=LittleDict)
-
-$(docstring(MulticlassFalsePositive()))
-
-    MulticlassFalsePositive()(ŷ, y)
-
-Number of false positives for multiclass observations `ŷ` and ground
-truth `y`, using default return type. $DS_RET
-
-For more information, run `info(MulticlassFalsePositive)`.
-
-"""
-function MulticlassPositive end
-const multiclass_false_positive = MulticlassFalsePositive()
-const multiclass_falsepositive  = MulticlassFalsePositive()
-const mfp = MulticlassFalsePositive()
-
-
-# ----------------------------------------------------
-# MulticlassFalseNegative
-
-"""
-    MulticlassFalseNegative(; return_type=LittleDict)
-
-$(docstring(MulticlassFalseNegative()))
-
-    MulticlassFalseNegative()(ŷ, y)
-
-Number of false negatives for multiclass observations `ŷ` and ground
-truth `y`, using default return type. $DS_RET
-
-For more information, run `info(MulticlassFalseNegative)`.
-
-"""
-function MulticlassNegative end
-const multiclass_false_negative = MulticlassFalseNegative()
-const multiclass_falsenegative  = MulticlassFalseNegative()
-const mfn = MulticlassFalseNegative()
-
-
-# ----------------------------------------------------
-# MulticlassTruePositiveRate
-
-"""
-    MulticlassTruePositiveRate(; average=macro_avg, return_type=LittleDict)
-
-$(docstring(MulticlassTruePositiveRate()))
-
-    MulticlassTruePositiveRate(ŷ, y)
-    MulticlassTruePositiveRate(ŷ, y, class_w)
-
-True positive rate (a.k.a. sensitivity, recall, hit rate) for
-multiclass observations `ŷ` and ground truth `y`, using default
-averaging and return type. $DS_AVG_RET $CLASS_W
-
-For more information, run `info(MulticlassTruePositiveRate)`.
-
-"""
-function MulticlassTruePositiveRate end
-const multiclass_true_positive_rate = MulticlassTruePositiveRate()
-const multiclass_truepositive_rate  = MulticlassTruePositiveRate()
-const multiclass_tpr                = MulticlassTruePositiveRate()
-const multiclass_sensitivity        = MulticlassTruePositiveRate()
-const multiclass_hit_rate           = MulticlassTruePositiveRate()
-const MTPR                          = MulticlassTruePositiveRate
-const multiclass_recall             = MulticlassTruePositiveRate()
-const MulticlassRecall              = MulticlassTruePositiveRate
-
-
-# ----------------------------------------------------
-# MulticlassTrueNegativeRate
-
-"""
-    MulticlassTrueNegativeRate(; average=macro_avg, return_type=LittleDict)
-
-$(docstring(MulticlassTrueNegativeRate()))
-
-    MulticlassTrueNegativeRate()(ŷ, y)
-    MulticlassTrueNegativeRate()(ŷ, y, class_w)
-
-True negative rate for multiclass observations `ŷ` and ground truth
-`y`, using default averaging and return type. $DS_AVG_RET $CLASS_W
-
-For more information, run `info(MulticlassTrueNegativeRate)`.
-
-"""
-function MulticlassTrueNegativeRate end
-const multiclass_true_negative_rate = MulticlassTrueNegativeRate()
-const multiclass_truenegative_rate  = MulticlassTrueNegativeRate()
-const multiclass_tnr                = MulticlassTrueNegativeRate()
-const multiclass_specificity        = MulticlassTrueNegativeRate()
-const multiclass_selectivity        = MulticlassTrueNegativeRate()
-const MulticlassSpecificity         = MulticlassTrueNegativeRate
-const MTNR                          = MulticlassTrueNegativeRate
-
-
-# ----------------------------------------------------
-# MulticlassFalsePositiveRate
-
-"""
-    MulticlassFalsePositiveRate(; average=macro_avg, return_type=LittleDict)
-
-$(docstring(MulticlassFalsePositiveRate()))
-
-    MulticlassFalsePositiveRate()(ŷ, y)
-    MulticlassFalsePositiveRate()(ŷ, y, class_w)
-
-False positive rate for multiclass observations `ŷ` and ground truth
-`y`, using default averaging and return type.  $DS_AVG_RET $CLASS_W
-
-For more information, run `info(MulticlassFalsePositiveRate)`.
-
-"""
-function MulticlassFalsePositiveRate end
-const multiclass_false_positive_rate = MulticlassFalsePositiveRate()
-const multiclass_falsepositive_rate  = MulticlassFalsePositiveRate()
-const multiclass_fpr                 = MulticlassFalsePositiveRate()
-const MFPR                           = MulticlassFalsePositiveRate
-const multiclass_fallout             = MFPR()
-
-
-# ----------------------------------------------------
-# MulticlassFalseNegativeRate
-
-"""
-    MulticlassFalseNegativeRate(; average=macro_avg, return_type=LittleDict)
-
-$(docstring(MulticlassFalseNegativeRate()))
-
-    MulticlassFalseNegativeRate()(ŷ, y)
-    MulticlassFalseNegativeRate()(ŷ, y, class_w)
-
-False negative rate for multiclass observations `ŷ` and ground truth
-`y`, using default averaging and return type.  $DS_AVG_RET $CLASS_W
-
-For more information, run `info(MulticlassFalseNegativeRate)`.
-
-"""
-function MulticlassFalseNegativeRate end
-const multiclass_false_negative_rate = MulticlassFalseNegativeRate()
-const multiclass_falsenegative_rate  = MulticlassFalseNegativeRate()
-const multiclass_fnr                 = MulticlassFalseNegativeRate()
-const MFNR                           = MulticlassFalseNegativeRate
-const multiclass_miss_rate           = MFNR()
-
-
-# ----------------------------------------------------
-# MulticlassFalseDiscoveryRate
-
-"""
-    MulticlassFalseDiscoveryRate(; average=macro_avg, return_type=LittleDict)
-
-$(docstring(MulticlassFalseDiscoveryRate()))
-
-    MulticlassFalseDiscoveryRate()(ŷ, y)
-    MulticlassFalseDiscoveryRate()(ŷ, y, class_w)
-
-False discovery rate for multiclass observations `ŷ` and ground truth
-`y`, using default averaging and return type.  $DS_AVG_RET $CLASS_W
-
-For more information, run `info(MulticlassFalseDiscoveryRate)`.
-
-"""
-function MulticlassFalseDiscoveryRate end
-const multiclass_false_discovery_rate = MulticlassFalseDiscoveryRate()
-const multiclass_falsediscovery_rate  = MulticlassFalseDiscoveryRate()
-const multiclass_fdr                  = MulticlassFalseDiscoveryRate()
-const MFDR                            = MulticlassFalseDiscoveryRate
-
-
-# ----------------------------------------------------
-# MulticlassPrecision
-
-"""
-    MulticlassPrecision(; average=macro_avg, return_type=LittleDict)
-
-$(docstring(MulticlassPrecision()))
-
-    MulticlassPrecision()(ŷ, y)
-    MulticlassPrecision()(ŷ, y, class_w)
-
-Precision for multiclass observations `ŷ` and ground truth `y`, using
-default averaging and return type. $DS_AVG_RET $CLASS_W
-
-For more information, run `info(MulticlassPrecision)`.
-
-"""
-function MulticlassPrecision end
-const multiclass_precision                 = MulticlassPrecision()
-const multiclass_ppv                       = MulticlassPrecision()
-const multiclass_positive_predictive_value = MulticlassPrecision()
-const multiclass_positivepredictive_value  = MulticlassPrecision()
-const MPPV                                 = MulticlassPrecision
-
-
-# ----------------------------------------------------
-# MulticlassNegativePredictiveValue
-
-"""
-    MulticlassNegativePredictiveValue(; average=macro_avg, return_type=LittleDict)
-
-$(docstring(MulticlassNegativePredictiveValue()))
-
-    MulticlassNegativePredictiveValue()(ŷ, y)
-    MulticlassNegativePredictiveValue()(ŷ, y, class_w)
-
-Negative predictive value for multiclass observations `ŷ` and ground truth
-`y`, using default averaging and return type. $DS_AVG_RET $CLASS_W
-
-For more information, run `info(MulticlassNegativePredictiveValue)`.
-
-"""
-function MulticlassNegativePredictiveValue end
-const multiclass_npv                       = MulticlassNegativePredictiveValue()
-const multiclass_negative_predictive_value = MulticlassNegativePredictiveValue()
-const multiclass_negativepredictive_value  = MulticlassNegativePredictiveValue()
-const MNPV                                 = MulticlassNegativePredictiveValue
-
-
-# -----------------------------------------------------
-## INTERNAL FUNCTIONS ON MULTICLASS CONFUSION MATRIX
-
-_mtp(m::CM, return_type::Type{Vector}) = diag(m.mat)
-_mtp(m::CM, return_type::Type{LittleDict}) =
-    LittleDict(m.labels, diag(m.mat))
-
-_mfp(m::CM, return_type::Type{Vector}) =
-    (col_sum = vec(sum(m.mat, dims=2)); col_sum .-= diag(m.mat))
-
-_mfp(m::CM, return_type::Type{LittleDict}) =
-    (col_sum = vec(sum(m.mat, dims=2)); col_sum .-= diag(m.mat);
-     LittleDict(m.labels, col_sum))
-
-_mfn(m::CM, return_type::Type{Vector}) =
-    (row_sum = vec(collect(transpose(sum(m.mat, dims=1))));
-     row_sum .-= diag(m.mat))
-
-_mfn(m::CM, return_type::Type{LittleDict}) =
-    (row_sum = vec(collect(transpose(sum(m.mat, dims=1))));
-     row_sum .-= diag(m.mat); LittleDict(m.labels, row_sum))
-
-function _mtn(m::CM, return_type::Type{Vector})
-    _sum = sum(m.mat, dims=2)
-    _sum .= sum(m.mat) .- (_sum .+= sum(m.mat, dims=1)'.- diag(m.mat))
-    return vec(_sum)
-end
-
-function _mtn(m::CM, return_type::Type{LittleDict})
-    _sum = sum(m.mat, dims=2)
-    _sum .= sum(m.mat) .- (_sum .+= sum(m.mat, dims=1)'.- diag(m.mat))
-    return LittleDict(m.labels, vec(_sum))
-end
-
-@inline _mean(x::Arr{<:Real}) = mean(skipnan(x)) # defined in src/data/data.jl
-
-@inline function _class_w(level_m::Arr{<:String},
-                          class_w::AbstractDict{<:Any, <:Real})
-    class_w_labels = levels(keys(class_w))
-    string.(class_w_labels) == level_m || throw(ArgumentError(W_KEY_MISMATCH))
-    return [class_w[l] for l in class_w_labels]
-end
-
-@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real},
-                            average::NoAvg, return_type::Type{Vector})
-    return vec(a ./ (a + b))
-end
-
-@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real},
-                            average::NoAvg, return_type::Type{LittleDict})
-    return LittleDict(m.labels, _mc_helper(m, a, b, average, Vector))
-end
-
-@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real},
-                            average::MacroAvg, return_type)
-    return _mean(_mc_helper(m, a, b, no_avg, Vector))
-end
-
-@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real},
-                            average::MicroAvg, return_type)
-    a_sum, b_sum = sum(a), sum(b)
-    return a_sum / (a_sum + b_sum)
-end
-
-@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real},
-                            class_w::AbstractDict{<:Any, <:Real},
-                            average::NoAvg, return_type::Type{Vector})
-    level_w = _class_w(m.labels, class_w)
-    return _mc_helper(m, a, b, no_avg, return_type) .* level_w
-end
-
-@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real},
-                            class_w::AbstractDict{<:Any, <:Real},
-                            average::MacroAvg, return_type::Type{Vector})
-    return _mean(_mc_helper(m, a, b, class_w, no_avg, return_type))
-end
-
-@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real},
-                            class_w::AbstractDict{<:Any, <:Real},
-                            average::MicroAvg, return_type)
-    @warn W_PROMOTE_WARN
-    return _mc_helper(m, a, b, class_w, macro_avg, Vector)
-end
-
-@inline function _mc_helper_b(m::CM, helper_name,
-                              class_w::AbstractDict{<:Any, <:Real},
-                              average::NoAvg, return_type::Type{Vector})
-    level_w = _class_w(m.labels, class_w)
-    return (1 .- helper_name(m, no_avg, return_type)) .* level_w
-end
-
-@inline function _mc_helper_b(m::CM, helper_name,
-                              class_w::AbstractDict{<:Any, <:Real},
-                              average::NoAvg, return_type::Type{LittleDict})
-    level_w = _class_w(m.labels, class_w)
-    return LittleDict(m.labels, ((1 .- helper_name(m, no_avg, Vector)) .* level_w))
-end
-
-@inline function _mc_helper_b(m::CM, helper_name,
-                              class_w::AbstractDict{<:Any, <:Real},
-                              average::MacroAvg, return_type)
-    return _mean(_mc_helper_b(m, helper_name, class_w, no_avg, Vector))
-end
-
-@inline function _mc_helper_b(m::CM, helper_name,
-                              class_w::AbstractDict{<:Any, <:Real},
-                              average::MicroAvg, return_type)
-    @warn W_PROMOTE_WARN
-    return _mc_helper_b(m, helper_name, class_w, macro_avg, Vector)
-end
-
-@inline function _mc_helper_b(m::CM, helper_name, average::NoAvg,
-                              return_type::Type{LittleDict})
-    return LittleDict(m.labels, 1.0 .- helper_name(m, average, Vector))
-end
-
-@inline function _mc_helper_b(m::CM, helper_name, average::NoAvg,
-                              return_type::Type{Vector})
-    return 1.0 .- helper_name(m, average, Vector)
-end
-
-@inline function _mc_helper_b(m::CM, helper_name, average::MacroAvg,
-                              return_type)
-    return 1.0 .- helper_name(m, average, Vector)
-end
-
-@inline function _mc_helper_b(m::CM, helper_name, average::MicroAvg,
-                              return_type)
-    return 1.0 .- helper_name(m, average, Vector)
-end
-
-@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real},
-                            class_w::AbstractDict{<:Any, <:Real},
-                            average::NoAvg, return_type::Type{LittleDict})
-    level_w = _class_w(m.labels, class_w)
-    return LittleDict(m.labels, _mc_helper(m, a, b, class_w, no_avg, Vector))
-end
-
-@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real},
-                            class_w::AbstractDict{<:Any, <:Real},
-                            average::MacroAvg, return_type::Type{U}) where U
-    return _mean(_mc_helper(m, a, b, class_w, no_avg, Vector))
-end
-
-@inline function _mc_helper(m::CM, a::Arr{<:Real}, b::Arr{<:Real},
-                            class_w::AbstractDict{<:Any, <:Real},
-                            average::MicroAvg, return_type::Type{U}) where U
-    @warn W_PROMOTE_WARN
-    return _mc_helper(m, a, b, class_w, macro_avg, return_type)
-end
-
-function _mtpr(m::CM, average::A, return_type::Type{U}) where {A, U}
-    mtp_val, mfn_val = _mtp_vec(m), _mfn_vec(m)
-    return _mc_helper(m, mtp_val, mfn_val, average, return_type)
-end
-
-function _mtpr(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A,
-               return_type::Type{U}) where {A, U}
-    mtp_val, mfn_val = _mtp_vec(m), _mfn_vec(m)
-    return _mc_helper(m, mtp_val, mfn_val, class_w, average, return_type)
-end
-
-function _mtnr(m::CM, average::A, return_type::Type{U}) where {A, U}
-    mtn_val, mfp_val = _mtn_vec(m), _mfp_vec(m)
-    return _mc_helper(m, mtn_val, mfp_val, average, return_type)
-end
-
-function _mtnr(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A,
-               return_type::Type{U}) where {A, U}
-    mtn_val, mfp_val = _mtn_vec(m), _mfp_vec(m)
-    return _mc_helper(m, mtn_val, mfp_val, class_w, average, return_type)
-end
-
-_mfpr(m::CM, average::A, return_type::Type{U}) where {A, U} =
-    _mc_helper_b(m, _mtnr, average, return_type)
-
-function _mfpr(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A,
-               return_type::Type{U}) where {A, U}
-    return _mc_helper_b(m, _mtnr, class_w, average, return_type)
-end
-
-_mfnr(m::CM, average::A, return_type::Type{U}) where {A, U} =
-    _mc_helper_b(m, _mtpr, average, return_type)
-
-function _mfnr(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A,
-               return_type::Type{U}) where {A, U}
-    return _mc_helper_b(m, _mtpr, class_w, average, return_type)
-end
-
-function _mfdr(m::CM, average::A, return_type::Type{U}) where {A, U}
-    mfp_val, mtp_val = _mfp_vec(m), _mtp_vec(m)
-    return _mc_helper(m, mfp_val, mtp_val, average, return_type)
-end
-
-function _mfdr(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A,
-               return_type::Type{U}) where {A, U}
-    mfp_val, mtp_val = _mfp_vec(m), _mtp_vec(m)
-    return _mc_helper(m, mfp_val, mtp_val, class_w, average, return_type)
-end
-
-function _mnpv(m::CM, average::A, return_type::Type{U}) where {A, U}
-    mtn_val, mfn_val = _mtn_vec(m), _mfn_vec(m)
-    return _mc_helper(m, mtn_val, mfn_val, average, return_type)
-end
-
-function _mnpv(m::CM, class_w::AbstractDict{<:Any, <:Real}, average::A,
-               return_type::Type{U}) where {A, U}
-    mtn_val, mfn_val = _mtn_vec(m), _mfn_vec(m)
-    return _mc_helper(m, mtn_val, mfn_val, class_w, average, return_type)
-end
-
-## CALLABLES ON MULTICLASS CONFUSION MATRIX
-
-(p::MulticlassTruePositive)(m::CM)  = _mtp(m, p.return_type)
-(n::MulticlassTrueNegative)(m::CM)  = _mtn(m, n.return_type)
-(p::MulticlassFalsePositive)(m::CM) = _mfp(m, p.return_type)
-(n::MulticlassFalseNegative)(m::CM) = _mfn(m, n.return_type)
-
-(r::MTPR)(m::CM) = _mtpr(m, r.average, r.return_type)
-(r::MTPR)(m::CM, w::AbstractDict{<:Any, <:Real}) =
-    _mtpr(m, w, r.average, r.return_type)
-
-(r::MTNR)(m::CM) = _mtnr(m, r.average, r.return_type)
-(r::MTNR)(m::CM, w::AbstractDict{<:Any, <:Real}) =
-    _mtnr(m, w, r.average, r.return_type)
-
-(r::MFPR)(m::CM) = _mfpr(m, r.average, r.return_type)
-(r::MFPR)(m::CM, w::AbstractDict{<:Any, <:Real}) =
-    _mfpr(m, w, r.average, r.return_type)
-
-(r::MFNR)(m::CM) = _mfnr(m, r.average, r.return_type)
-(r::MFNR)(m::CM, w::AbstractDict{<:Any, <:Real}) =
-    _mfnr(m, w, r.average, r.return_type)
-
-(r::MFDR)(m::CM) = _mfdr(m, r.average, r.return_type)
-(r::MFDR)(m::CM, w::AbstractDict{<:Any, <:Real}) =
-    _mfdr(m, w, r.average, r.return_type)
-
-(v::MNPV)(m::CM) = _mnpv(m, v.average, v.return_type)
-(v::MNPV)(m::CM, w::AbstractDict{<:Any, <:Real}) =
-    _mnpv(m, w, v.average, v.return_type)
-
-(p::MulticlassPrecision)(m::CM) =
-    _mc_helper_b(m, _mfdr, p.average, p.return_type)
-(p::MulticlassPrecision)(m::CM, class_w::AbstractDict{<:Any, <:Real}) =
-    _mc_helper_b(m, _mfdr, class_w, p.average, p.return_type)
-
-@inline function _fs_helper(m::CM, β::Real, mtp_val::Arr{<:Real}, mfp_val::Arr{<:Real}, mfn_val::Arr{<:Real},
-                    average::NoAvg, return_type::Type{LittleDict})
-    β2 = β^2
-    return LittleDict(m.labels, (1 + β2) * mtp_val ./ ((1 + β2) * mtp_val + β2 * mfn_val + mfp_val))
-end
-
-@inline function _fs_helper(m::CM, β::Real, mtp_val::Arr{<:Real}, mfp_val::Arr{<:Real}, mfn_val::Arr{<:Real},
-                    average::NoAvg, return_type::Type{Vector})
-    β2 = β^2
-    return (1 + β2) * mtp_val ./ ((1 + β2) * mtp_val + β2 * mfn_val + mfp_val)
-end
-
-@inline function _fs_helper(m::CM, β::Real, mtp_val::Arr{<:Real}, mfp_val::Arr{<:Real}, mfn_val::Arr{<:Real},
-                            average::MacroAvg, return_type::Type{U}) where U
-    return _mean(_fs_helper(m, β, mtp_val, mfp_val, mfn_val, no_avg, Vector))
-end
-
-function (f::MulticlassFScore)(m::CM)
-    f.average == micro_avg && return MulticlassRecall(; average=micro_avg, return_type=f.return_type)(m)
-    mtp_val = _mtp(m, Vector)
-    mfp_val = _mfp(m, Vector)
-    mfn_val = _mfn(m, Vector)
-    return _fs_helper(m, f.β, mtp_val, mfp_val, mfn_val, f.average, f.return_type)
-end
-
-@inline function _fs_helper(m::CM, w::AbstractDict{<:Any, <:Real}, β::Real,
-                    average::NoAvg, return_type::Type{LittleDict})
-    level_w = _class_w(m.labels, w)
-    return LittleDict(m.labels,
-                      MulticlassFScore(β=β,
-                                       average=no_avg,
-                                       return_type=Vector)(m) .* level_w)
-end
-
-@inline function _fs_helper(m::CM, w::AbstractDict{<:Any, <:Real}, β::Real,
-                    average::NoAvg, return_type::Type{Vector})
-    level_w = _class_w(m.labels, w)
-    return MulticlassFScore(β=β,
-                            average=no_avg,
-                            return_type=Vector)(m) .* level_w
-end
-
-@inline function _fs_helper(m::CM, w::AbstractDict{<:Any, <:Real}, β::Real,
-                            average::MacroAvg, return_type::Type{U}) where U
-    return _mean(_fs_helper(m, w, β, no_avg, Vector))
-end
-
-@inline function _fs_helper(m::CM, w::AbstractDict{<:Any, <:Real}, β::Real,
-                            average::MicroAvg, return_type::Type{U}) where U
-    @warn W_PROMOTE_WARN
-    return _fs_helper(m, w, β, macro_avg, return_type)
-end
-
-function (f::MulticlassFScore)(m::CM, class_w::AbstractDict{<:Any, <:Real})
-    return _fs_helper(m, class_w, f.β, f.average, f.return_type)
-end
-
-## Callables on arrays
-
-for M_ex in (:MulticlassTruePositive, :MulticlassTrueNegative,
-          :MulticlassFalsePositive, :MulticlassFalseNegative)
-    @eval call(m::$M_ex, ŷ, y) = m(_confmat(ŷ, y, warn=false))
-end
-
-for M_ex in (:MTPR, :MTNR, :MFPR, :MFNR, :MFDR, :MulticlassPrecision, :MNPV,
-          :MulticlassFScore)
-    @eval call(m::$M_ex, ŷ, y) = m(_confmat(ŷ, y, warn=false))
-    @eval call(m::$M_ex, ŷ, y, class_w::AbstractDict{<:Any, <:Real}) =
-        m(_confmat(ŷ, y, warn=false), class_w)
-end
diff --git a/src/measures/loss_functions_interface.jl b/src/measures/loss_functions_interface.jl
deleted file mode 100644
index 5d7d6125..00000000
--- a/src/measures/loss_functions_interface.jl
+++ /dev/null
@@ -1,208 +0,0 @@
-# implementation of MLJ measure interface for LossFunctions.jl
-
-function naked(T::Type)
-    without_module_name = split(string(T), '.') |> last
-    without_type_parameters = split(without_module_name, '{') |> first
-    return Symbol(without_type_parameters)
-end
-
-const WITHOUT_PARAMETERS =
-    setdiff(LOSS_FUNCTIONS, WITH_PARAMETERS)
-
-## WRAPPER
-
-abstract type SupervisedLoss <: Unaggregated end
-
-
-struct MarginLoss{L<:LossFunctions.MarginLoss} <: SupervisedLoss
-    loss::L
-end
-
-struct DistanceLoss{L<:LossFunctions.DistanceLoss} <: SupervisedLoss
-    loss::L
-end
-
-# INTERFACE FOR EXTRACTING PARAMETERS
-
-# LossFunctions.jl does not have a uniform interface for extacting
-# parameters, and hence:
-
-_parameter(loss::LossFunctions.DWDMarginLoss) = loss.q
-_parameter(loss::LossFunctions.SmoothedL1HingeLoss) = loss.gamma
-_parameter(loss::LossFunctions.HuberLoss) = loss.d
-_parameter(loss::LossFunctions.L1EpsilonInsLoss) = loss.ε
-_parameter(loss::LossFunctions.L2EpsilonInsLoss) = loss.ε
-_parameter(::LossFunctions.LPDistLoss{P}) where P = P
-_parameter(::LossFunctions.L1DistLoss) = 1
-_parameter(::LossFunctions.L2DistLoss) = 2
-_parameter(loss::LossFunctions.QuantileLoss) = loss.τ
-
-
-## CONSTRUCTORS AND CALLING BEHAVIOUR
-
-err_wrap(n) = ArgumentError("Bad @wrap syntax: $n. ")
-
-# We define amacro to wrap a concrete `LossFunctions.SupervisedLoss`
-# type and define its constructor, and to define property access in
-# case of parameters; the macro also defines calling behaviour:
-macro wrap_loss(ex)
-    ex.head == :call || throw(err_wrap(1))
-    Loss_ex = ex.args[1]
-    Loss_str = string(Loss_ex)
-    if Loss_ex in MARGIN_LOSSES
-        T = :MarginLoss
-    else
-        T = :DistanceLoss
-    end
-
-    # bind name to wrapped version of LossFunctions loss:
-    program = quote
-        const $Loss_ex = $T{<:LossFunctions.$Loss_ex}
-        name(M::Type{<:$Loss_ex}) = $Loss_str
-    end
-
-    # defined instances
-    alias = snakecase(string(Loss_ex))
-    push!(program.args, quote
-          instances(::Type{<:$Loss_ex}) = [$alias, ]
-          end)
-
-    # define kw constructor and expose any parameter as a property:
-    if length(ex.args) == 1
-        push!(program.args, quote
-              $Loss_ex() = $T(LossFunctions.$Loss_ex())
-              Base.propertynames(::$Loss_ex) = ()
-              end)
-    elseif length(ex.args) > 1
-        sub_ex = ex.args[2]
-        sub_ex.head == :parameters || throw(err_wrap(2))
-        length(sub_ex.args) == 1 || throw(err_wrap("Only 1 kwarg supported"))
-        sub_ex.args[1].head == :kw || throw(err_wrap(3))
-        var_ex = sub_ex.args[1].args[1]
-        var_str = string(var_ex)
-        val_ex = sub_ex.args[1].args[2]
-        push!(program.args, quote
-              $Loss_ex(; $var_ex=$val_ex) =
-                  $T(LossFunctions.$Loss_ex($var_ex))
-              $Loss_ex(p) = $Loss_ex($var_ex=p)
-              Base.propertynames(::$Loss_ex) = (Symbol($var_str), )
-              function Base.getproperty(wrapper::$Loss_ex, name::Symbol)
-                  if name === Symbol($var_str)
-                      return _parameter(getfield(wrapper, :loss)) # see below
-                  end
-                  error("type $($Loss_ex) has no property $name")
-              end
-              end)
-    else
-        throw(err_wrap(4))
-    end
-
-    esc(program)
-end
-
-for Loss in WITHOUT_PARAMETERS
-    eval(:(@wrap_loss $Loss()))
-end
-
-@wrap_loss DWDMarginLoss(; q=1.0)
-@wrap_loss SmoothedL1HingeLoss(; gamma=1.0)
-@wrap_loss HuberLoss(; d=1.0)
-@wrap_loss L1EpsilonInsLoss(; ε=1.0)
-@wrap_loss L2EpsilonInsLoss(; ε=1.0)
-@wrap_loss LPDistLoss(; P=2)
-@wrap_loss QuantileLoss(; τ=0.7)
-
-
-## GENERIC TRAITS
-
-const LossFunctions = LossFunctions
-is_measure_type(::Type{<:SupervisedLoss})          = true
-orientation(::Type{<:SupervisedLoss})              = :loss
-reports_each_observation(::Type{<:SupervisedLoss}) = true
-is_feature_dependent(::Type{<:SupervisedLoss})     = false
-supports_weights(::Type{<:SupervisedLoss}) = true
-docstring(M::Type{<:SupervisedLoss})       = name(M)
-
-
-## CALLING - DISTANCE BASED LOSS FUNCTIONS
-
-MMI.prediction_type(::Type{<:DistanceLoss}) = :deterministic
-MMI.target_scitype(::Type{<:DistanceLoss}) = Union{Vec{Continuous},Vec{Count}}
-
-call(measure::DistanceLoss, yhat, y) =
-    (getfield(measure, :loss)).(yhat, y)
-
-function call(measure::DistanceLoss, yhat, y, w::AbstractArray)
-    return w .* call(measure, yhat, y)
-end
-
-
-## CALLING - MARGIN BASED LOSS FUNCTIONS
-
-MMI.prediction_type(::Type{<:MarginLoss}) = :probabilistic
-MMI.target_scitype(::Type{<:MarginLoss})  = AbstractArray{<:Finite{2}}
-
-# rescale [0, 1] -> [-1, 1]:
-_scale(p) = 2p - 1
-
-function call(measure::MarginLoss, yhat, y)
-    probs_of_observed = broadcast(pdf, yhat, y)
-    loss = getfield(measure, :loss)
-    return loss.(_scale.(probs_of_observed), 1)
-end
-
-call(measure::MarginLoss, yhat, y, w::AbstractArray) =
-    w .* call(measure, yhat, y)
-
-
-## ADJUSTMENTS
-
-human_name(::Type{<:L1EpsilonInsLoss}) = "l1 ϵ-insensitive loss"
-human_name(::Type{<:L2EpsilonInsLoss}) = "l2 ϵ-insensitive loss"
-human_name(::Type{<:DWDMarginLoss}) = "distance weighted discrimination loss"
-
-_signature(::Any) = ""
-_signature(::Type{<:HuberLoss}) = "`HuberLoss(; d=1.0)`"
-_signature(::Type{<:DWDMarginLoss}) = "`DWDMarginLoss(; q=1.0)`"
-_signature(::Type{<:SmoothedL1HingeLoss}) = "`SmoothedL1HingeLoss(; gamma=1.0)`"
-_signature(::Type{<:L1EpsilonInsLoss}) = "`L1EpsilonInsLoss(; ε=1.0)`"
-_signature(::Type{<:L2EpsilonInsLoss}) = "`L2EpsilonInsLoss(; ε=1.0)`"
-_signature(::Type{<:LPDistLoss}) = "`LPDistLoss(; P=2)`"
-_signature(::Type{<:QuantileLoss}) = "`QuantileLoss(; τ=0.7)`"
-
-
-## ALIASES AND DOCSTRINGS
-
-const DOC_LOSS_FUNCTIONS =
-"""
-For more detail, see the original LossFunctions.jl documentation *but
-note differences in the signature.*
-
-Losses from LossFunctions.jl do not support `missing` values. To use
-with `missing` values, replace `(ŷ, y)` with `skipinvalid(ŷ, y))`.
-"""
-
-for Loss_ex in DISTANCE_LOSSES
-    eval(quote
-         sig = _signature($Loss_ex)
-         isempty(sig) || (sig = "Constructor signature: "*sig)
-         @create_aliases $Loss_ex
-         @create_docs($Loss_ex,
-                      typename = name($Loss_ex),
-                      body=DOC_LOSS_FUNCTIONS,
-                      footer=sig)
-         end)
-end
-
-for Loss_ex in MARGIN_LOSSES
-    eval(quote
-         sig = _signature($Loss_ex)
-         isempty(sig) || (sig = "Constructor signature: "*sig)
-         @create_aliases $Loss_ex
-         @create_docs($Loss_ex,
-                      typename = name($Loss_ex),
-                      body=DOC_LOSS_FUNCTIONS,
-                      scitype=DOC_FINITE_BINARY,
-                      footer= sig)
-         end)
-end
diff --git a/src/measures/measure_search.jl b/src/measures/measure_search.jl
deleted file mode 100644
index bd813009..00000000
--- a/src/measures/measure_search.jl
+++ /dev/null
@@ -1,65 +0,0 @@
-const LOCAL_MEASURE_TYPES = filter(x->x != SupervisedLoss,
-                                   vcat(subtypes(MLJBase.Unaggregated),
-                                        subtypes(MLJBase.Aggregated)))
-
-const LOSS_FUNCTIONS_MEASURE_TYPES =
-    [eval(:($Loss)) for Loss in LOSS_FUNCTIONS]
-
-const MEASURE_TYPES = vcat(LOCAL_MEASURE_TYPES, LOSS_FUNCTIONS_MEASURE_TYPES)
-
-const MeasureProxy = NamedTuple{Tuple(MEASURE_TRAITS)}
-
-function Base.show(stream::IO, p::MeasureProxy)
-    instances = "["*join(p.instances, ", ")*"]"
-    print(stream, "(name = $(p.name), instances = $instances, ...)")
-end
-
-function Base.show(stream::IO, ::MIME"text/plain", p::MeasureProxy)
-    printstyled(IOContext(stream, :color=> MLJBase.SHOW_COLOR[]),
-                p.docstring, bold=false, color=:magenta)
-    println(stream)
-    MLJBase.fancy_nt(stream, p)
-end
-
-"""
-    measures()
-
-List all measures as named-tuples keyed on measure traits.
-
-    measures(filters...)
-
-List all measures compatible with the target `y`.
-
-    measures(needle::Union{AbstractString,Regex}
-
-List all measures with `needle` in a measure's `name`, `instances`, or
-`docstring`
-
-
-### Example
-
-Find all classification measures supporting sample weights:
-
-    measures(m -> m.target_scitype <: AbstractVector{<:Finite} &&
-                  m.supports_weights)
-
-Find all measures in the "rms" family:
-
-    measures("rms")
-
-"""
-function measures(conditions...)
-    all_measures = map(info, MEASURE_TYPES)
-    return filter(all_measures) do measure
-        all(c(measure) for c in conditions)
-    end
-end
-
-function measures(needle::Union{AbstractString,Regex})
-    f = m -> occursin(needle, m.name) ||
-        occursin(needle, m.docstring) ||
-        occursin(needle, join(m.instances, " "))
-    return MLJBase.measures(f)
-end
-
-measures() = measures(x->true)
diff --git a/src/measures/measures.jl b/src/measures/measures.jl
deleted file mode 100644
index 3c23a4f9..00000000
--- a/src/measures/measures.jl
+++ /dev/null
@@ -1,302 +0,0 @@
-const PROPER_SCORING_RULES = "[Gneiting and Raftery (2007), \"Strictly"*
-    "Proper Scoring Rules, Prediction, and Estimation\""*
-    "](https://doi.org/10.1198/016214506000001437)"
-const DOC_FINITE =
-    "`AbstractArray{<:Union{Finite,Missing}` (multiclass classification)"
-const DOC_FINITE_BINARY =
-    "`AbstractArray{<:Union{Finite{2},Missing}}` (binary classification)"
-const DOC_ORDERED_FACTOR =
-    "`AbstractArray{<:Union{OrderedFactor,Missing}}` (classification of ordered target)"
-const DOC_ORDERED_FACTOR_BINARY =
-    "`AbstractArray{<:Union{OrderedFactor{2},Missing}}` "*
-    "(binary classification where choice of \"true\" effects the measure)"
-const DOC_CONTINUOUS = "`AbstractArray{<:Union{Continuous,Missing}}` (regression)"
-const DOC_COUNT = "`AbstractArray{<:Union{Count,Missing}}`"
-const DOC_MULTI = "`AbtractArray{<:Union{Missing,T}` where `T` is `Continuous` "*
-    "or `Count` (for respectively continuous or discrete Distribution.jl objects in "*
-    "`ŷ`) or  `OrderedFactor` or `Multiclass` "*
-    "(for `UnivariateFinite` distributions in `ŷ`)"
-
-const DOC_INFINITE = "`AbstractArray{<:Union{Infinite,Missing}}`"
-const INVARIANT_LABEL =
-    "This metric is invariant to class reordering."
-const VARIANT_LABEL =
-    "This metric is *not* invariant to class re-ordering"
-
-is_measure_type(::Any) = false
-
-# Each of the following traits, with fallbacks defined in
-# StatisticalTraits.jl, make sense for some or all measures:
-
-const MEASURE_TRAITS = [
-    :name,
-    :instances,
-    :human_name,
-    :target_scitype,
-    :supports_weights,
-    :supports_class_weights,
-    :prediction_type,
-    :orientation,
-    :reports_each_observation,
-    :aggregation,
-    :is_feature_dependent,
-    :docstring,
-    :distribution_type
-]
-
-# # FOR BUILT-IN MEASURES (subtyping Measure)
-
-abstract type Measure <: MLJType end
-abstract type Aggregated <: Measure end
-abstract type Unaggregated <: Measure end
-
-StatisticalTraits.reports_each_observation(::Type{<:Aggregated}) = false
-StatisticalTraits.reports_each_observation(::Type{<:Unaggregated}) = true
-
-
-# # FALLBACK CHECKS
-extra_check(::Measure, args...) = nothing
-function _check(measure::Measure, yhat, y)
-    check_dimensions(yhat, y)
-    extra_check(measure, yhat, y)
-end
-function _check(measure::Measure, yhat, y, w)
-    check_dimensions(yhat, y)
-    extra_check(measure, yhat, y, w)
-end
-function _check(measure::Measure, yhat, y, w::Arr)
-    check_dimensions(yhat, y)
-    check_dimensions(y, w)
-    extra_check(measure, yhat, y, w)
-end
-function _check(measure::Measure, yhat::Arr{<:UnivariateFinite})
-    check_dimensions(yhat, y)
-    check_pools(yhat, y)
-    extra_check(measure, yhat, y)
-end
-
-function _check(
-    measure::Measure,
-    yhat::Arr{<:UnivariateFinite},
-    y,
-    w::Arr
-)
-    check_dimensions(yhat, y)
-    check_pools(yhat, y)
-    extra_check(measure, yhat, y, w)
-end
-
-function _check(
-    measure::Measure,
-    yhat::Arr{<:UnivariateFinite},
-    y,
-    w::AbstractDict
-)
-    check_dimensions(yhat, y)
-    check_pools(yhat, y)
-    check_pools(yhat, w)
-    extra_check(measure, yhat, y, w)
-end
-
-# # METHODS TO EVALUATE MEASURES
-
-# See measures/README.md for details
-
-# `robust_single` can accept `missing` observations/predictions but is never overloaded;
-# `single` is overloaded but does not need to handle missings. This factoring allows us
-# to avoid method ambiguities which are cumbersome to avoid with only one function.
-
-robust_single(args...) = single(args...)
-robust_single(m, ::Missing, ::Missing) = missing
-robust_single(m, ::Missing, η) = missing
-robust_single(m, η̂, ::Missing) = missing
-
-const Label = Union{CategoricalValue, Number, AbstractString, Symbol, AbstractChar}
-
-# closure for broadcasting:
-robust_single(measure::Measure) = (ηhat, η) -> robust_single(measure, ηhat, η)
-
-call(measure::Unaggregated, yhat, y) = broadcast(robust_single(measure), yhat, y)
-function call(measure::Unaggregated, yhat, y, w::AbstractArray)
-    unweighted = broadcast(robust_single(measure), yhat, y)
-    return w .* unweighted
-end
-function call(measure::Unaggregated, yhat, y, weight_given_class::AbstractDict)
-    unweighted = broadcast(robust_single(measure), yhat, y)
-    w = @inbounds broadcast(η -> weight_given_class[η], y)
-    return w .* unweighted
-end
-
-# ## Top level
-function (measure::Measure)(args...)
-    _check(measure, args...)
-    call(measure, args...)
-end
-
-# # TRAITS
-
-# user-bespoke measures will subtype `Measure` directly and the
-# following will therefore not apply:
-StatisticalTraits.supports_weights(::Type{<:Union{Aggregated, Unaggregated}}) = true
-
-is_measure_type(::Type{<:Measure}) = true
-is_measure(m) = is_measure_type(typeof(m))
-
-# docstring fall-back:
-_decorate(s::AbstractString) = "`$s`"
-_decorate(v::Vector{<:AbstractString}) = join(_decorate.(v), ", ")
-function MMI.docstring(M::Type{<:Measure})
-    list = _decorate(instances(M))
-    ret = "`$(name(M))` - $(human_name(M)) type"
-    isempty(list) || (ret *= " with instances $list")
-    ret *= ". "
-    return ret
-end
-
-# display:
-show_as_constructed(::Type{<:Measure}) = true
-
-# info
-function StatisticalTraits.info(M::Type{<:Measure})
-    values = Tuple(@eval($trait($M)) for trait in MEASURE_TRAITS)
-    return NamedTuple{Tuple(MEASURE_TRAITS)}(values)
-end
-
-StatisticalTraits.info(m::Measure) = StatisticalTraits.info(typeof(m))
-
-
-# # AGGREGATION
-
-(::Sum)(v) = sum(skipinvalid(v))
-(::Sum)(v::LittleDict) = sum(values(v))
-
-(::Mean)(v) = mean(skipinvalid(v))
-(::Mean)(v::LittleDict) = mean(values(v))
-
-(::RootMeanSquare)(v) = sqrt(mean(skipinvalid(v).^2))
-
-aggregate(v, measure) = aggregation(measure)(v)
-
-# aggregation is no-op on scalars:
-const MeasureValue = Union{Real,Tuple{<:Real,<:Real}} # number or interval
-aggregate(x::MeasureValue, measure) = x
-
-
-# # UNIVERSAL CALLING SYNTAX
-
-# yhat - predictions (point or probabilisitic)
-# X - features
-# y - target observations
-# w - per-observation weights
-
-function value(measure, yhat, X, y, w)
-    vfdep     = Val(is_feature_dependent(measure))
-    vsweights = Val(supports_weights(measure) ||
-                    supports_class_weights(measure))
-    return value(measure, yhat, X, y, w, vfdep, vsweights)
-end
-
-# # UNIVERSAL CALLING INTERFACE
-
-#  is feature independent, weights not supported:
-value(m, yhat, X, y, w, ::Val{false}, ::Val{false}) = m(yhat, y)
-
-#  is feature dependent:, weights not supported:
-value(m, yhat, X, y, w, ::Val{true}, ::Val{false}) = m(yhat, X, y)
-
-#  is feature independent, weights supported:
-value(m, yhat, X, y, w,         ::Val{false}, ::Val{true}) = m(yhat, y, w)
-value(m, yhat, X, y, ::Nothing, ::Val{false}, ::Val{true}) = m(yhat, y)
-
-#  is feature dependent, weights supported:
-value(m, yhat, X, y, w,         ::Val{true}, ::Val{true}) = m(yhat, X, y, w)
-value(m, yhat, X, y, ::Nothing, ::Val{true}, ::Val{true}) = m(yhat, X, y)
-
-# # helpers
-
-_scale(x, w::Arr, i) = x*w[i]
-_scale(x, ::Nothing, i::Any) = x
-
-function check_pools(ŷ, y)
-    levels(y) == levels(ŷ[1]) ||
-        error("Conflicting categorical pools found "*
-              "in observations and predictions. ")
-    return nothing
-end
-
-function check_pools(ŷ, w::AbstractDict)
-    Set(levels(ŷ[1])) == Set(keys(w)) ||
-        error("Conflicting categorical pools found "*
-              "in class weights and predictions. ")
-    return nothing
-end
-
-# # INCLUDE SPECIFIC MEASURES AND TOOLS
-
-include("meta_utilities.jl")
-include("roc.jl")
-include("confusion_matrix.jl")
-include("continuous.jl")
-include("finite.jl")
-include("probabilistic.jl")
-include("loss_functions_interface.jl")
-
-
-# # DEFAULT MEASURES
-
-default_measure(T, S) = _default_measure(T, nonmissingtype(S))
-
-_default_measure(T, S) = nothing
-
-# Deterministic + Continuous / Count ==> RMS
-function _default_measure(
-    ::Type{<:Deterministic},
-    ::Type{<:Union{Vec{<:Continuous}, Vec{<:Count}}},
-)
-   return rms
-end
-
-# Deterministic + Finite ==> Misclassification rate
-function _default_measure(
-    ::Type{<:Deterministic},
-    ::Type{<:Vec{<:Finite}},
-)
-    return misclassification_rate
-end
-
-# Probabilistic + Finite / Count ==> log loss
-function _default_measure(
-    ::Type{<:Probabilistic},
-    ::Type{<:Union{Vec{<:Finite},Vec{<:Count}}},
-)
-    return log_loss
-end
-
-# Probabilistic + Continuous ==> Log loss
-function _default_measure(
-    ::Type{<:Probabilistic},
-    ::Type{<:Vec{<:Continuous}},
-)
-    return log_loss
-end
-
-function _default_measure(
-    ::Type{<:MMI.ProbabilisticDetector},
-    ::Type{<:Vec{<:OrderedFactor{2}}},
-)
-    return area_under_curve
-end
-
-function _default_measure(
-    ::Type{<:MMI.DeterministicDetector},
-    ::Type{<:Vec{<:OrderedFactor{2}}},
-)
-    return balanced_accuracy
-end
-
-# Fallbacks
-default_measure(M::Type{<:Supervised}) = default_measure(M, target_scitype(M))
-default_measure(::M) where M <: Supervised = default_measure(M)
-
-default_measure(M::Type{<:Annotator}) = _default_measure(M, target_scitype(M))
-default_measure(::M) where M <: Annotator = default_measure(M)
diff --git a/src/measures/meta_utilities.jl b/src/measures/meta_utilities.jl
deleted file mode 100644
index 3b0de197..00000000
--- a/src/measures/meta_utilities.jl
+++ /dev/null
@@ -1,233 +0,0 @@
-const DOC_OBSERVATIONS =
-    "on predictions `ŷ`, "*
-    "given ground truth observations `y`. "
-const DOC_WEIGHTS =
-    "Optionally specify per-sample weights, `w`. "
-const DOC_CLASS_WEIGHTS =
-    "An optional `AbstractDict`, denoted `class_w` above, "*
-    "keyed on `levels(y)`, specifies class weights. "
-
-macro create_aliases(M_ex)
-    esc(quote
-        M = $M_ex
-        for alias in Symbol.(instances(M))
-        # isdefined(parentmodule(M), alias) || eval(:(const $alias = $M()))
-        eval(:(const $alias = $M()))
-        end
-        end)
-end
-
-function detailed_doc_string(M; typename="", body="", footer="", scitype="")
-
-    _instances = _decorate(instances(M))
-    human_name = MLJBase.human_name(M)
-    if isempty(scitype)
-        scitype = "`$(target_scitype(M))`"
-    end
-
-    if isempty(typename)
-        ret = "    $M\n\n"
-    else
-        ret = "    MLJBase.$typename\n\n"
-    end
-
-    ret *= "A measure type for $(human_name)"
-    isempty(_instances) ||
-        (ret  *= ", which includes the instance(s): "*
-         "$_instances")
-    ret *= ".\n\n"
-    ret *= "    $(name(M))()(ŷ, y)\n"
-    supports_weights(M) &&
-        (ret *= "    $(name(M))()(ŷ, y, w)\n")
-    supports_class_weights(M) &&
-        (ret *= "    $(name(M))()(ŷ, y, class_w)\n")
-    ret *= "\n"
-    if isempty(fieldnames(M))
-            ret *= "Evaluate the $(human_name) "
-    else
-        ret *= "Evaluate the default instance of $(name(M)) "
-    end
-    ret *= "$DOC_OBSERVATIONS"
-    supports_weights(M) &&
-        (ret *= DOC_WEIGHTS)
-    supports_class_weights(M) &&
-        (ret *= DOC_CLASS_WEIGHTS)
-    ret *= "\n\n"
-    isempty(body) || (ret *= "$body\n\n")
-    ret *= "Requires `scitype(y)` to be a subtype of $scitype; "
-    ret *= "`ŷ` must be an array of `$(prediction_type(M))` predictions. "
-    isempty(footer) ||(ret *= "\n\n$footer")
-    ret *= "\n\n"
-    ret *= "For more information, run `info($(name(M)))`. "
-    return ret
-end
-
-
-_err_create_docs() = error(
-    "@create_docs syntax error. Usage: \n"*
-    "@create_docs(MeasureType, typename=..., body=..., scitype=..., footer=...")
-macro create_docs(M_ex, exs...)
-    M_ex isa Symbol || _err_create_docs()
-    t = ""
-    b = ""
-    s = ""
-    f = ""
-    for ex in exs
-        ex.head == :(=) || _err_create_docs()
-        ex.args[1] == :typename && (t = ex.args[2])
-        ex.args[1] == :body &&     (b = ex.args[2])
-        ex.args[1] == :scitype &&  (s = ex.args[2])
-        ex.args[1] == :footer &&   (f = ex.args[2])
-    end
-    esc(quote
-        "$(detailed_doc_string($M_ex, typename=$t, body=$b, scitype=$s, footer=$f))"
-        function $M_ex end
-        end)
-end
-
-# TODO: I wonder why this is not a macro?
-
-"""
-    metadata_measure(T; kw...)
-
-Helper function to write the metadata (trait definitions) for a single
-measure.
-
-### Compulsory keyword arguments
-
-- `target_scitype`: The allowed scientific type of `y` in `measure(ŷ,
-  y, ...)`. This is typically some abstract array. E.g, in single
-  target variable regression this is typically
-  `AbstractArray{<:Union{Missing,Continuous}}`. For a binary
-  classification metric insensitive to class order, this would
-  typically be `Union{AbstractArray{<:Union{Missing,Multiclass{2}}},
-  AbstractArray{<:Union{Missing,OrderedFactor{2}}}}`, which has the
-  alias `FiniteArrMissing`.
-
-- `orientation`: Orientation of the measure.  Use `:loss` when lower is
-    better and `:score` when higher is better.  For example, set
-    `:loss` for root mean square and `:score` for area under the ROC
-    curve.
-
-- `prediction_type`: Refers to `ŷ` in `measure(ŷ, y, ...)` and should
-  be one of: `:deterministic` (`ŷ` has same type as `y`),
-  `:probabilistic` or `:interval`.
-
-
-#### Optional keyword arguments
-
-The following have meaningful defaults but may still require
-overloading:
-
-- `instances`: A vector of strings naming the built-in instances of
-  the measurement type provided by the implementation, which are
-  usually just common aliases for the default instance. E.g., for
-  `RSquared` has the `instances = ["rsq", "rsquared"]` which are both
-  defined as `RSquared()` in the implementation. `MulticlassFScore`
-  has the `instances = ["macro_f1score", "micro_f1score",
-  "multiclass_f1score"]`, where `micro_f1score =
-  MulticlassFScore(average=micro_avg)`, etc.  Default is `String[]`.
-
-- `aggregation`: Aggregation method for measurements, typically
-        `Mean()` (for, e.g., mean absolute error) or `Sum()` (for number
-    of true positives). Default is `Mean()`. Must subtype
-    `StatisticalTraits.AggregationMode`. It is used to:
-
-   - aggregate measurements in resampling (e.g., cross-validation)
-
-   - aggregating per-observation measurements returned by `single` in
-     the fallback definition of `call` for `Unaggregated` measures
-    (such as area under the ROC curve).
-
-- `supports_weights`: Whether the measure can be called with
-  per-observation weights `w`, as in `l2(ŷ, y, w)`. Default is `true`.
-
-- `supports_class_weights`: Whether the measure can be called with a
-  class weight dictionary `w`, as in `micro_f1score(ŷ, y, w)`. Default
-  is `true`. Default is `false`.
-
-- `human_name`: Ordinary name of measure. Used in the full
-  auto-generated docstring, which begins "A measure type for
-  \$human_name ...". Eg, the `human_name` for `TruePositive` is `number
-  of true positives. Default is snake-case version of type name, with
-  underscores replaced by spaces; so `MeanAbsoluteError` becomes "mean
-  absolute error".
-
-- `docstring`: An abbreviated docstring, displayed by
-  `info(measure)`. Fallback uses `human_name` and lists the
-  `instances`.
-
-"""
-function metadata_measure(T; name::String="",
-                          human_name="",
-                          instances::Vector{String}=String[],
-                          target_scitype=Unknown,
-                          prediction_type::Symbol=:unknown,
-                          orientation::Symbol=:unknown,
-                          aggregation=Mean(),
-                          is_feature_dependent::Bool=false,
-                          supports_weights::Bool=true,
-                          supports_class_weights::Bool=false,
-                          docstring::String="",
-                          distribution_type=Unknown)
-    pred_str        = "$prediction_type"
-    orientation_str = "$orientation"
-#    dist = ifelse(ismissing(distribution_type), missing, "$distribution_type")
-    ex = quote
-
-        # traits common with models:
-        if !isempty($name)
-            StatisticalTraits.name(::Type{<:$T}) = $name
-        end
-        if !isempty($docstring)
-            StatisticalTraits.docstring(::Type{<:$T}) = $docstring
-        end
-        StatisticalTraits.target_scitype(::Type{<:$T}) = $target_scitype
-        StatisticalTraits.prediction_type(::Type{<:$T}) = Symbol($pred_str)
-        StatisticalTraits.supports_weights(::Type{<:$T}) = $supports_weights
-
-        # traits specific to measures:
-        if !isempty($instances)
-            StatisticalTraits.instances(::Type{<:$T}) = $instances
-        end
-        if !isempty($human_name)
-            StatisticalTraits.human_name(::Type{<:$T}) = $human_name
-        end
-        StatisticalTraits.orientation(::Type{<:$T}) = Symbol($orientation_str)
-        StatisticalTraits.aggregation(::Type{<:$T}) = $aggregation
-        StatisticalTraits.is_feature_dependent(::Type{<:$T}) =
-            $is_feature_dependent
-        StatisticalTraits.supports_class_weights(::Type{<:$T}) =
-            $supports_class_weights
-        StatisticalTraits.distribution_type(::Type{<:$T}) = $distribution_type
-
-    end
-    parentmodule(T).eval(ex)
-end
-
-"""
-
-    measures_for_export()
-
-Return a list of the symbolic representation of all:
-
-- measure types (subtypes of `Aggregated` and `Unaggregated`) measure
-
-- type aliases (as defined by the constant
-  `MLJBase.MEASURE_TYPE_ALIASES`)
-
-- all built-in measure instances (as declared by `instances` trait)
-
-"""
-function measures_for_export()
-    ret = MLJBase.MEASURE_TYPE_ALIASES
-    for m in measures()
-        name = m.name |> Symbol
-        push!(ret, name)
-        for instance in m.instances
-            alias = Symbol(instance)
-            push!(ret, alias)
-        end
-    end
-    return ret
-end
diff --git a/src/measures/probabilistic.jl b/src/measures/probabilistic.jl
deleted file mode 100644
index 11c3bcdf..00000000
--- a/src/measures/probabilistic.jl
+++ /dev/null
@@ -1,423 +0,0 @@
-const DOC_DISTRIBUTIONS =
-"""
-In the case the predictions `ŷ` are continuous probability
-distributions, such as `Distributions.Normal`, replace the above sum
-with an integral, and interpret `p` as the probablity density
-function. In case of discrete distributions over the integers, such as
-`Distributions.Poisson`, sum over all integers instead of `C`.
-"""
-const WITH_L2NORM_CONTINUOUS =
-    [@eval(Distributions.$d) for d in [
-        :Chisq,
-        :Gamma,
-        :Beta,
-        :Chi,
-        :Cauchy,
-        :Normal,
-        :Uniform,
-        :Logistic,
-        :Exponential]]
-
-const WITH_L2NORM_COUNT =
-    [@eval(Distributions.$d) for d in [
-        :Poisson,
-        :DiscreteUniform,
-        :DiscreteNonParametric]]
-
-const WITH_L2NORM = vcat([UnivariateFinite, ],
-                         WITH_L2NORM_CONTINUOUS,
-                         WITH_L2NORM_COUNT)
-
-const UD = Distributions.UnivariateDistribution
-
-# ========================================================
-# AGGREGATED MEASURES
-
-# ---------------------------------------------------------
-# AreaUnderCurve
-
-# Implementation based on the Mann-Whitney U statistic.
-# see https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve
-# and https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test#Area_under_curve_(AUC)_statistic_for_ROC_curves
-
-
-struct AreaUnderCurve <: Aggregated end
-
-metadata_measure(AreaUnderCurve;
-                 human_name = "area under the ROC",
-                 instances = ["area_under_curve", "auc"],
-                 target_scitype           = FiniteArrMissing{2},
-                 prediction_type          = :probabilistic,
-                 orientation              = :score,
-                 supports_weights         = false,
-                 distribution_type        = UnivariateFinite)
-
-const AUC = AreaUnderCurve
-@create_aliases AreaUnderCurve
-
-@create_docs(AreaUnderCurve,
-body=
-"""
-Returns the area under the ROC ([receiver operator
-characteristic](https://en.wikipedia.org/wiki/Receiver_operating_characteristic))
-
-If `missing` or `NaN` values are present, use `auc(skipinvalid(yhat, y)...)`.
-
-$INVARIANT_LABEL
-""",
-scitpye = DOC_FINITE_BINARY)
-
-# core algorithm:
-function _auc(ŷ, y)
-    lab_pos = classes(ŷ)[2] # 'positive' label
-    scores = pdf.(ŷ, lab_pos) # associated scores
-    ranks = StatsBase.tiedrank(scores)
-    n = length(y)
-    n_neg = 0  # to keep of the number of negative preds
-    T = eltype(ranks)
-    R_pos = zero(T) # sum of positive ranks
-    @inbounds for (i,j) in zip(eachindex(y), eachindex(ranks))
-        if y[i] == lab_pos
-            R_pos += ranks[j]
-        else
-            n_neg += 1
-        end
-    end
-    n_pos = n - n_neg # number of positive predictions
-    U = R_pos - T(0.5)*n_pos*(n_pos + 1) # Mann-Whitney U statistic
-    return U / (n_neg * n_pos)
-end
-
-# Missing values not supported, but allow `Missing` in eltype, because
-# `skipinvalid(yhat, y)` does not tighten the type. See doc string above.
-
-call(::AUC, ŷ, y) = _auc(ŷ, y)
-
-# ========================================================
-# UNAGGREGATED MEASURES
-
-# ---------------------------------------------------------------------
-# LogScore
-
-struct LogScore{R <: Real} <: Unaggregated
-    tol::R
-end
-LogScore(;eps=eps(), tol=eps) = LogScore(tol)
-
-metadata_measure(LogScore;
-                 instances                = ["log_score", ],
-                 target_scitype           = Union{
-                     Arr{<:Union{Missing,Multiclass}},
-                     Arr{<:Union{Missing,OrderedFactor}},
-                     Arr{<:Union{Missing,Continuous}},
-                     Arr{<:Union{Missing,Count}}},
-                 prediction_type          = :probabilistic,
-                 orientation              = :score,
-                 distribution_type        = Union{WITH_L2NORM...})
-
-@create_aliases LogScore
-
-@create_docs(LogScore,
-body=
-"""
-Since the score is undefined in the case that the true observation is
-predicted to occur with probability zero, probablities are clamped
-between `tol` and `1-tol`, where `tol` is a constructor key-word
-argument.
-
-If `p` is the predicted probability mass or density function
-corresponding to a *single* ground truth observation `η`, then the
-score for that example is
-
-    log(clamp(p(η), tol), 1 - tol)
-
-For example, for a binary target with "yes"/"no" labels, and
-predicted probability of "yes" equal to 0.8, an observation of "no"
-scores `log(0.2)`.
-
-The predictions `ŷ` should be an array of `UnivariateFinite`
-distributions in the case of `Finite` target `y`, and otherwise a
-supported `Distributions.UnivariateDistribution` such as `Normal` or
-`Poisson`.
-
-See also [`LogLoss`](@ref), which differs only in sign.
-""",
-scitype=DOC_MULTI)
-
-# for single finite observation:
-single(c::LogScore, d::UnivariateFinite, η) =
-    log(clamp(pdf(d, η), c.tol, 1 - c.tol))
-
-# for a single infinite observation:
-single(c::LogScore, d::Distributions.UnivariateDistribution, η) =
-    log(clamp(pdf(d, η), c.tol, 1 - c.tol))
-
-# to resolve method ambiguities:
-single(::LogScore, ::UnivariateFinite, ::Missing) = missing
-single(::LogScore, ::Distributions.UnivariateDistribution, ::Missing) = missing
-single(::LogScore, ::Missing, ::Missing) = missing
-
-# performant broadasting in case of UnivariateFiniteArray:
-call(c::LogScore, ŷ::UnivariateFiniteArray, y) =
-    log.(clamp.(broadcast(pdf, ŷ, y), c.tol, 1 - c.tol))
-call(c::LogScore, ŷ::UnivariateFiniteArray, y, w::AbstractArray) = call(c, ŷ, y) .* w
-
-# ---------------------------------------------------------------------
-# LogLoss
-
-struct LogLoss{R <: Real} <: Unaggregated
-    tol::R
-end
-LogLoss(;eps=eps(), tol=eps) = LogLoss(tol)
-
-metadata_measure(LogLoss;
-                 instances                = ["log_loss", "cross_entropy"],
-                 target_scitype           = Union{
-                     Arr{<:Union{Missing,Multiclass}},
-                     Arr{<:Union{Missing,OrderedFactor}},
-                     Arr{<:Union{Missing,Continuous}},
-                     Arr{<:Union{Missing,Count}}},
-                 prediction_type          = :probabilistic,
-                 orientation              = :loss,
-                 distribution_type        = Union{WITH_L2NORM...})
-
-const CrossEntropy = LogLoss
-@create_aliases LogLoss
-
-@create_docs(LogLoss,
-body=
-"""
-For details, see [`LogScore`](@ref), which differs only by a sign.
-""",
-scitype=DOC_MULTI)
-
-# for single observation:
-single(c::LogLoss, d, η) = -single(LogScore(tol=c.tol), d, η)
-
-# to get performant broadasting in case of UnivariateFiniteArray:
-call(c::LogLoss, ŷ::UnivariateFiniteArray, y) =
-    -call(LogScore(tol=c.tol), ŷ, y)
-call(c::LogLoss, ŷ::UnivariateFiniteArray, y, w::AbstractArray) =
-    -call(LogScore(tol=c.tol), ŷ, y, w)
-
-
-# -----------------------------------------------------
-# BrierScore
-
-struct BrierScore <: Unaggregated end
-
-metadata_measure(BrierScore;
-                 human_name = "Brier score (a.k.a. quadratic score)",
-                 instances                = ["brier_score",],
-                 target_scitype           = Union{
-                     Arr{<:Union{Missing,Multiclass}},
-                     Arr{<:Union{Missing,OrderedFactor}},
-                     Arr{<:Union{Missing,Continuous}},
-                     Arr{<:Union{Missing,Count}}},
-                 prediction_type          = :probabilistic,
-                 orientation              = :score,
-                 distribution_type        = Union{WITH_L2NORM...})
-
-@create_aliases BrierScore
-
-@create_docs(BrierScore,
-body=
-"""
-Convention as in $PROPER_SCORING_RULES
-
-*Finite case.* If `p` is the predicted probability mass function for a
-*single* observation `η`, and `C` all possible classes, then the
-corresponding score for that observation is given by
-
-``2p(η) - \\left(\\sum_{c ∈ C} p(c)^2\\right) - 1``
-
-*Warning.* `BrierScore()` is a "score" in the sense that bigger is
-better (with `0` optimal, and all other values negative). In Brier's
-original 1950 paper, and many other places, it has the opposite sign,
-despite the name. Moreover, the present implementation does not treat
-the binary case as special, so that the score may differ in the binary
-case by a factor of two from usage elsewhere.
-
-*Infinite case.* Replacing the sum above with an integral does *not*
-lead to the formula adopted here in the case of `Continuous` or
-`Count` target `y`. Rather the convention in the paper cited above is
-adopted, which means returning a score of
-
-``2p(η) - ∫ p(t)^2 dt``
-
-in the `Continuous` case (`p` the probablity density function) or
-
-``2p(η) - ∑_t p(t)^2``
-
-in the `Count` cae (`p` the probablity mass function).
-""",
-scitype=DOC_MULTI)
-
-# calling on single finite observation:
-function single(::BrierScore,
-                d::UnivariateFinite,
-                η)
-    levels = classes(d)
-    pvec = broadcast(pdf, d, levels)
-    offset = 1 + sum(pvec.^2)
-    return 2 * pdf(d, η) - offset
-end
-
-# calling on a single infinite observation:
-single(::BrierScore, d::Distributions.UnivariateDistribution, η) =
-    2*pdf(d, η) - Distributions.pdfsquaredL2norm(d)
-
-# To get performant broadcasted version in case of UnivariateFiniteArray:
-function call(
-    ::BrierScore,
-    ŷ::UnivariateFiniteArray,
-    y
-    )
-
-    probs = pdf(ŷ, classes(first(ŷ)))
-    offset = 1 .+ vec(sum(probs.^2, dims=2))
-
-    2 .* broadcast(pdf, ŷ, y) .- offset
-end
-call(m::BrierScore, ŷ::UnivariateFiniteArray, y, w::AbstractArray) = call(m, ŷ, y) .* w
-
-
-# -----------------------------------------------------
-# BrierLoss
-
-struct BrierLoss <: Unaggregated end
-
-metadata_measure(BrierLoss;
-                 human_name = "Brier loss (a.k.a. quadratic loss)",
-                 instances                = ["brier_loss",],
-                 target_scitype           = Union{
-                     Arr{<:Union{Missing,Multiclass}},
-                     Arr{<:Union{Missing,OrderedFactor}},
-                     Arr{<:Union{Missing,Continuous}},
-                     Arr{<:Union{Missing,Count}}},
-                 prediction_type          = :probabilistic,
-                 orientation              = :loss,
-                 distribution_type        = Union{WITH_L2NORM...})
-
-@create_aliases BrierLoss
-
-@create_docs(BrierLoss,
-body=
-"""
-For details, see [`BrierScore`](@ref), which differs only by a sign.
-""",
-scitype=DOC_MULTI)
-
-# calling on single observation:
-single(::BrierLoss, d, η) = - single(BrierScore(), d, η)
-
-# to get performant broadcasting in case of UnivariateFiniteArray:
-call(m::BrierLoss, ŷ::UnivariateFiniteArray, y) =
-    -call(BrierScore(), ŷ, y)
-call(m::BrierLoss, ŷ::UnivariateFiniteArray, y, w::AbstractArray) =
-    -call(BrierScore(), ŷ, y, w)
-
-
-# -----------------------------------------------------
-# SphericalScore
-
-struct SphericalScore{T<:Real} <: Unaggregated
-    alpha::T
-end
-SphericalScore(; alpha=2) = SphericalScore(alpha)
-
-metadata_measure(SphericalScore;
-                 human_name               = "Spherical score",
-                 instances                = ["spherical_score",],
-                 target_scitype           = Union{
-                     Arr{<:Union{Missing,Multiclass}},
-                     Arr{<:Union{Missing,OrderedFactor}},
-                     Arr{<:Union{Missing,Continuous}},
-                     Arr{<:Union{Missing,Count}}},
-                 prediction_type          = :probabilistic,
-                 orientation              = :score,
-                 distribution_type        = Union{WITH_L2NORM...})
-
-@create_aliases SphericalScore
-
-@create_docs(SphericalScore,
-body=
-"""
-Convention as in $PROPER_SCORING_RULES: If `η` takes on a finite
-number of classes `C` and ``p(η)` is the predicted probability for a
-*single* observation `η`, then the corresponding score for that
-observation is given by
-
-``p(y)^α / \\left(\\sum_{η ∈ C} p(η)^α\\right)^{1-α} - 1``
-
-where `α` is the measure parameter `alpha`.
-
-$DOC_DISTRIBUTIONS
-
-""",
-scitype=DOC_MULTI)
-
-# calling on single observations:
-function single(s::SphericalScore, d::UnivariateFinite, η)
-    α = s.alpha
-    levels = classes(d)
-    pvec = broadcast(pdf, d, levels)
-    return (pdf(d, η)/norm(pvec, α))^(α - 1)
-end
-
-single(s::SphericalScore, d::Distributions.UnivariateDistribution, η) =
-    pdf(d, η)/sqrt(Distributions.pdfsquaredL2norm(d))
-
-# to compute the α-norm along last dimension:
-_norm(A::AbstractArray{<:Any,N}, α) where N =
-    sum(x -> x^α, A, dims=N).^(1/α)
-
-# To get performant version in case of UnivariateFiniteArray:
-function call(
-    s::SphericalScore,
-    ŷ::UnivariateFiniteArray,
-    y
-    )
-    α = s.alpha
-    alphanorm(A) = _norm(A, α)
-
-    predicted_probs = pdf(ŷ, classes(first(ŷ)))
-
-    (broadcast(pdf, ŷ, y) ./ alphanorm(predicted_probs)).^(α - 1)
-end
-call(s::SphericalScore, ŷ::UnivariateFiniteArray, y, w::AbstractArray) =
-    call(s, ŷ, y) .* w
-
-
-# ---------------------------------------------------------------------------
-# Extra check for L2 norm based proper scoring rules
-
-err_l2_norm(m) = ArgumentError(
-    "Distribution not supported by $m. "*
-    "Supported distributions are "*
-    join(string.(map(s->"`$s`", WITH_L2NORM)), ", ", ", and "))
-
-const ERR_UNSUPPORTED_ALPHA = ArgumentError(
-    "Only `alpha = 2` is supported, unless scoring a `Finite` target. ")
-
-# not for export:
-const L2ProperScoringRules = Union{LogScore,
-                                   LogLoss,
-                                   BrierScore,
-                                   BrierLoss,
-                                   SphericalScore}
-
-function extra_check(measure::L2ProperScoringRules, yhat, args...)
-
-    D = nonmissing(eltype(yhat))
-    D <: Distributions.Distribution || D <: UnivariateFinite ||
-        (D = typeof(findfirst(x->!isinvalid(x), yhat)))
-    D <: Union{Nothing, WITH_L2NORM...} ||
-        throw(err_l2_norm(measure))
-
-    if measure isa SphericalScore
-        measure.alpha == 2 || throw(ERR_UNSUPPORTED_ALPHA)
-    end
-
-    return nothing
-end
diff --git a/src/measures/roc.jl b/src/measures/roc.jl
deleted file mode 100644
index 8614b00e..00000000
--- a/src/measures/roc.jl
+++ /dev/null
@@ -1,91 +0,0 @@
-## ROC COMPUTATION
-
-"""
-    _idx_unique_sorted(v)
-
-Internal function to return the index of unique elements in `v` under the
-assumption that the vector `v` is sorted in decreasing order.
-"""
-function _idx_unique_sorted(v::Vec{<:Real})
-    n    = length(v)
-    idx  = ones(Int, n)
-    p, h = 1, 1
-    cur  = v[1]
-    @inbounds while h < n
-        h     += 1                  # head position
-        cand   = v[h]               # candidate value
-        cand   < cur || continue    # is it new? otherwise skip
-        p     += 1                  # if new store it
-        idx[p] = h
-        cur    = cand               # and update the last seen value
-    end
-    p < n && deleteat!(idx, p+1:n)
-    return idx
-end
-
-"""
-    fprs, tprs, ts = roc_curve(ŷ, y) = roc(ŷ, y)
-
-Return the ROC curve for a two-class probabilistic prediction `ŷ` given the
-ground  truth `y`. The true positive rates, false positive rates over a range
-of thresholds `ts` are returned. Note that if there are `k` unique scores,
-there are correspondingly  `k` thresholds and `k+1` "bins" over which the FPR
-and TPR are constant:
-
-* `[0.0 - thresh[1]]`
-* `[thresh[1] - thresh[2]]`
-* ...
-* `[thresh[k] - 1]`
-
-consequently, `tprs` and `fprs` are of length `k+1` if `ts` is of length `k`.
-
-To draw the curve using your favorite plotting backend, do `plot(fprs, tprs)`.
-"""
-function roc_curve(ŷm, ym)
-    ŷ, y    = skipinvalid(ŷm, ym)
-    length(classes(ŷ)) ==  2 || throw(
-        ArgumentError("`ŷ` must be a two-class probabilistic prediction")
-    )
-    length(levels(y)) == 2 || throw(
-        ArgumentError("`y` must be a categorical vector with two-levels.")
-    )
-    n       = length(y)
-    lab_pos = levels(y)[2]
-    scores  = pdf.(ŷ, lab_pos)
-    ranking = sortperm(scores, rev=true)
-
-    scores_sort = scores[ranking]
-    y_sort_bin  = (y[ranking] .== lab_pos)
-
-    idx_unique = _idx_unique_sorted(scores_sort)
-    thresholds = scores_sort[idx_unique]
-
-    # detailed computations with example:
-    # y = [  1   0   0   1   0   0   1]
-    # s = [0.5 0.5 0.2 0.2 0.1 0.1 0.1] thresh are 0.5 0.2, 0.1 // idx [1, 3, 5]
-    # ŷ = [  0   0   0   0   0   0   0] (0.5 - 1.0] # no pos pred
-    # ŷ = [  1   1   0   0   0   0   0] (0.2 - 0.5] # 2 pos pred
-    # ŷ = [  1   1   1   1   0   0   0] (0.1 - 0.2] # 4 pos pred
-    # ŷ = [  1   1   1   1   1   1   1] [0.0 - 0.1] # all pos pre
-
-    idx_unique_2 = idx_unique[2:end]   # [3, 5]
-    n_ŷ_pos      = idx_unique_2 .- 1   # [2, 4] implicit [0, 2, 4, 7]
-
-    cs   = cumsum(y_sort_bin)          # [1, 1, 1, 2, 2, 2, 3]
-    n_tp = cs[n_ŷ_pos]                 # [1, 2] implicit [0, 1, 2, 3]
-    n_fp = n_ŷ_pos .- n_tp             # [1, 2] implicit [0, 1, 2, 4]
-
-    # add end points
-    P = sum(y_sort_bin) # total number of true positives
-    N = n - P           # total number of true negatives
-
-    n_tp = [0, n_tp..., P] # [0, 1, 2, 3]
-    n_fp = [0, n_fp..., N] # [0, 1, 2, 4]
-
-    tprs = n_tp ./ P  # [0/3, 1/3, 2/3, 1]
-    fprs = n_fp ./ N  # [0/4, 1/4, 2/4, 1]
-
-    return fprs, tprs, thresholds
-end
-
-const roc = roc_curve
diff --git a/src/operations.jl b/src/operations.jl
index efa275ac..9fab3999 100644
--- a/src/operations.jl
+++ b/src/operations.jl
@@ -168,43 +168,7 @@ const err_unsupported_operation(operation) = ErrorException(
     "network machine that does not support it. "
 )
 
-## SURROGATE AND COMPOSITE MODELS
-
-
-for operation in [:predict,
-                  :predict_joint,
-                  :transform,
-                  :inverse_transform]
-    ex = quote
-        function $operation(model::Union{Composite,Surrogate}, fitresult,X)
-            if hasproperty(fitresult, $(QuoteNode(operation)))
-                return fitresult.$operation(X)
-            else
-                throw(err_unsupported_operation($operation))
-            end
-        end
-    end
-    eval(ex)
-end
-
-for (operation, fallback) in [(:predict_mode, :mode),
-                              (:predict_mean, :mean),
-                              (:predict_median, :median)]
-    ex = quote
-        function $(operation)(m::Union{ProbabilisticComposite,ProbabilisticSurrogate},
-                              fitresult,
-                              Xnew)
-            if hasproperty(fitresult, $(QuoteNode(operation)))
-                return fitresult.$(operation)(Xnew)
-            end
-            return $(fallback).(predict(m, fitresult, Xnew))
-        end
-    end
-    eval(ex)
-end
-
-
-## NETWORKCOMPOSITE MODELS
+## NETWORK COMPOSITE MODELS
 
 # In the case of `NetworkComposite` models, the `fitresult` is a learning network
 # signature. If we call a node in the signature (eg, do `fitresult.predict()`) then we may
@@ -222,9 +186,9 @@ for operation in [:predict,
                   :transform,
                   :inverse_transform]
     quote
-        function $operation(model::NetworkComposite, fitresult, Xnew)
+        function $operation(model::NetworkComposite, fitresult, Xnew...)
             if $(QuoteNode(operation)) in MLJBase.operations(fitresult)
-                return output_and_report(fitresult, $(QuoteNode(operation)), Xnew)
+                return output_and_report(fitresult, $(QuoteNode(operation)), Xnew...)
             end
             throw(err_unsupported_operation($operation))
         end
@@ -242,7 +206,7 @@ for (operation, fallback) in [(:predict_mode, :mode),
                 return output_and_report(fitresult, $(QuoteNode(operation)), Xnew)
             end
             # The following line retuns a `Tuple` since `m` is a `NetworkComposite`
-            predictions, report = predict(m, fitresult, Xnew) 
+            predictions, report = predict(m, fitresult, Xnew)
             return $(fallback).(predictions), report
         end
     end |> eval
diff --git a/src/resampling.jl b/src/resampling.jl
index 43483cc3..6b055951 100644
--- a/src/resampling.jl
+++ b/src/resampling.jl
@@ -14,8 +14,6 @@ const PREDICT_OPERATIONS_STRING = begin
     join(strings, ", ", ", or ")
 end
 const PROG_METER_DT = 0.1
-const ERR_WEIGHTS_REAL =
-    ArgumentError("`weights` must be a `Real` vector. ")
 const ERR_WEIGHTS_LENGTH =
     DimensionMismatch("`weights` and target "*
                       "have different lengths. ")
@@ -32,19 +30,41 @@ const ERR_INVALID_OPERATION = ArgumentError(
     "Invalid `operation` or `operations`. "*
     "An operation must be one of these: $PREDICT_OPERATIONS_STRING. ")
 _ambiguous_operation(model, measure) =
-    "`prediction_type($measure) == $(prediction_type(measure))` but "*
-    "`prediction_type($model) == $(prediction_type(model))`."
+    "`$measure` does not support a `model` with "*
+    "`prediction_type(model) == :$(prediction_type(model))`. "
 err_ambiguous_operation(model, measure) = ArgumentError(
     _ambiguous_operation(model, measure)*
-    "\nUnable to deduce an appropriate operation for $measure. "*
+    "\nUnable to infer an appropriate operation for `$measure`. "*
     "Explicitly specify `operation=...` or `operations=...`. ")
 err_incompatible_prediction_types(model, measure) = ArgumentError(
     _ambiguous_operation(model, measure)*
-    "If your model really is making probabilistic predictions, try explicitly "*
+    "If your model is truly making probabilistic predictions, try explicitly "*
     "specifiying operations. For example, for "*
     "`measures = [area_under_curve, accuracy]`, try "*
     "`operations=[predict, predict_mode]`. ")
-
+const LOG_AVOID = "\nTo override measure checks, set check_measure=false. "
+const LOG_SUGGESTION1 =
+    "\nPerhaps you want to set `operation="*
+    "predict_mode` or need to "*
+    "specify multiple operations, "*
+    "one for each measure. "
+const LOG_SUGGESTION2 =
+    "\nPerhaps you want to set `operation="*
+    "predict_mean` or `operation=predict_median`, or "*
+    "specify multiple operations, "*
+    "one for each measure. "
+ERR_MEASURES_OBSERVATION_SCITYPE(measure, T_measure, T) = ArgumentError(
+    "\nobservation scitype of target = `$T` but ($measure) only supports "*
+        "`$T_measure`."*LOG_AVOID
+)
+ERR_MEASURES_PROBABILISTIC(measure, suggestion) = ArgumentError(
+    "The model subtypes `Probabilistic`, and so is not supported by "*
+        "`$measure`. $suggestion"*LOG_AVOID
+)
+ERR_MEASURES_DETERMINISTIC(measure) = ArgumentError(
+    "The model subtypes `Deterministic`, "*
+        "and so is not supported by `$measure`. "*LOG_AVOID
+)
 
 # ==================================================================
 ## MODEL TYPES THAT CAN BE EVALUATED
@@ -345,7 +365,7 @@ For example, if you run `replace!(y, 'a' => 'b', 'b' => 'a')` and then re-run
 `train_test_pairs`, the returned `(train, test)` pairs will be the same.
 
 Pre-shuffling of `rows` is controlled by `rng` and `shuffle`. If `rng`
-is an integer, then the `StratifedCV` keyword constructor resets it to
+is an integer, then the `StratifedCV` keywod constructor resets it to
 `MersenneTwister(rng)`. Otherwise some `AbstractRNG` object is
 expected.
 
@@ -448,72 +468,68 @@ end
 """
     PerformanceEvaluation
 
-Type of object returned by [`evaluate`](@ref) (for models plus data)
-or [`evaluate!`](@ref) (for machines). Such objects encode estimates
-of the performance (generalization error) of a supervised model or
-outlier detection model.
-
-When `evaluate`/`evaluate!` is called, a number of train/test pairs
-("folds") of row indices are generated, according to the options
-provided, which are discussed in the [`evaluate!`](@ref)
-doc-string. Rows correspond to observations. The generated train/test
-pairs are recorded in the `train_test_rows` field of the
-`PerformanceEvaluation` struct, and the corresponding estimates,
-aggregated over all train/test pairs, are recorded in `measurement`, a
-vector with one entry for each measure (metric) recorded in `measure`.
-
-When displayed, a `PerformanceEvalution` object includes a value under
-the heading `1.96*SE`, derived from the standard error of the `per_fold`
-entries. This value is suitable for constructing a formal 95%
-confidence interval for the given `measurement`. Such intervals should
-be interpreted with caution. See, for example, Bates et al.
-[(2021)](https://arxiv.org/abs/2104.00673).
+Type of object returned by [`evaluate`](@ref) (for models plus data) or
+[`evaluate!`](@ref) (for machines). Such objects encode estimates of the performance
+(generalization error) of a supervised model or outlier detection model.
+
+When `evaluate`/`evaluate!` is called, a number of train/test pairs ("folds") of row
+indices are generated, according to the options provided, which are discussed in the
+[`evaluate!`](@ref) doc-string. Rows correspond to observations. The generated train/test
+pairs are recorded in the `train_test_rows` field of the `PerformanceEvaluation` struct,
+and the corresponding estimates, aggregated over all train/test pairs, are recorded in
+`measurement`, a vector with one entry for each measure (metric) recorded in `measure`.
+
+When displayed, a `PerformanceEvalution` object includes a value under the heading
+`1.96*SE`, derived from the standard error of the `per_fold` entries. This value is
+suitable for constructing a formal 95% confidence interval for the given
+`measurement`. Such intervals should be interpreted with caution. See, for example, Bates
+et al.  [(2021)](https://arxiv.org/abs/2104.00673).
 
 ### Fields
 
-These fields are part of the public API of the `PerformanceEvaluation`
-struct.
+These fields are part of the public API of the `PerformanceEvaluation` struct.
 
 - `model`: model used to create the performance evaluation. In the case a
     tuning model, this is the best model found.
 
 - `measure`: vector of measures (metrics) used to evaluate performance
 
-- `measurement`: vector of measurements - one for each element of
-  `measure` - aggregating the performance measurements over all
-  train/test pairs (folds). The aggregation method applied for a given
-  measure `m` is `aggregation(m)` (commonly `Mean` or `Sum`)
+- `measurement`: vector of measurements - one for each element of `measure` - aggregating
+  the performance measurements over all train/test pairs (folds). The aggregation method
+  applied for a given measure `m` is
+  `StatisticalMeasuresBase.external_aggregation_mode(m)` (commonly `Mean()` or `Sum()`)
 
-- `operation` (e.g., `predict_mode`): the operations applied for each
-  measure to generate predictions to be evaluated. Possibilities are:
-  $PREDICT_OPERATIONS_STRING.
+- `operation` (e.g., `predict_mode`): the operations applied for each measure to generate
+  predictions to be evaluated. Possibilities are: $PREDICT_OPERATIONS_STRING.
 
-- `per_fold`: a vector of vectors of individual test fold evaluations
-  (one vector per measure). Useful for obtaining a rough estimate of
-  the variance of the performance estimate.
+- `per_fold`: a vector of vectors of individual test fold evaluations (one vector per
+  measure). Useful for obtaining a rough estimate of the variance of the performance
+  estimate.
 
-- `per_observation`: a vector of vectors of individual observation
-  evaluations of those measures for which
-  `reports_each_observation(measure)` is true, which is otherwise
-  reported `missing`. Useful for some forms of hyper-parameter
-  optimization.
+- `per_observation`: a vector of vectors of vectors containing individual per-observation
+  measurements: for an evaluation `e`, `e.per_observation[m][f][i]` is the measurement for
+  the `i`th observation in the `f`th test fold, evaluated using the `m`th measure.  Useful
+  for some forms of hyper-parameter optimization. Note that an aggregregated measurement
+  for some measure `measure` is repeated across all observations in a fold if
+  `StatisticalMeasures.can_report_unaggregated(measure) == true`. If `e` has been computed
+  with the `per_observation=false` option, then `e_per_observation` is a vector of
+  `missings`.
 
-- `fitted_params_per_fold`: a vector containing `fitted params(mach)`
-  for each machine `mach` trained during resampling - one machine per
-  train/test pair. Use this to extract the learned parameters for each
-  individual training event.
+- `fitted_params_per_fold`: a vector containing `fitted params(mach)` for each machine
+  `mach` trained during resampling - one machine per train/test pair. Use this to extract
+  the learned parameters for each individual training event.
 
-- `report_per_fold`: a vector containing `report(mach)` for each
-  machine `mach` training in resampling - one machine per train/test
-  pair.
+- `report_per_fold`: a vector containing `report(mach)` for each machine `mach` training
+  in resampling - one machine per train/test pair.
 
-- `train_test_rows`: a vector of tuples, each of the form `(train, test)`,
-  where `train` and `test` are vectors of row (observation) indices for
-  training and evaluation respectively.
+- `train_test_rows`: a vector of tuples, each of the form `(train, test)`, where `train`
+  and `test` are vectors of row (observation) indices for training and evaluation
+  respectively.
 
 - `resampling`: the resampling strategy used to generate the train/test pairs.
 
 - `repeats`: the number of times the resampling strategy was repeated.
+
 """
 struct PerformanceEvaluation{M,
                              Measure,
@@ -617,48 +633,37 @@ end
 
 function _check_measure(measure, operation, model, y)
 
-    T = scitype(y)
+    # get observation scitype:
+    T = MLJBase.guess_observation_scitype(y)
+
+    # get type supported by measure:
+    T_measure = StatisticalMeasuresBase.observation_scitype(measure)
 
     T == Unknown && (return true)
-    target_scitype(measure) == Unknown && (return true)
-    prediction_type(measure) == :unknown && (return true)
+    T_measure == Union{} && (return true)
+    isnothing(StatisticalMeasuresBase.kind_of_proxy(measure)) && (return true)
 
-    avoid = "\nTo override measure checks, set check_measure=false. "
 
-    T <: target_scitype(measure) ||
-        throw(ArgumentError(
-            "\nscitype of target = $T but target_scitype($measure) = "*
-            "$(target_scitype(measure))."*avoid))
+    T <: T_measure || throw(ERR_MEASURES_OBSERVATION_SCITYPE(measure, T_measure, T))
 
     incompatible = model isa Probabilistic &&
         operation == predict &&
-        prediction_type(measure) != :probabilistic
+        StatisticalMeasuresBase.kind_of_proxy(measure) != LearnAPI.Distribution()
 
     if incompatible
-        if target_scitype(measure) <:
-            AbstractVector{<:Union{Missing,Finite}}
-            suggestion = "\nPerhaps you want to set `operation="*
-                "predict_mode` or need to "*
-                "specify multiple operations, "*
-                "one for each measure. "
-        elseif target_scitype(measure) <:
-            AbstractVector{<:Union{Missing,Continuous}}
-            suggestion = "\nPerhaps you want to set `operation="*
-                "predict_mean` or `operation=predict_median`, or "*
-                "specify multiple operations, "*
-                "one for each measure. "
+        if T <: Union{Missing,Finite}
+            suggestion = LOG_SUGGESTION1
+        elseif T <: Union{Missing,Infinite}
+            suggestion = LOG_SUGGESTION2
         else
             suggestion = ""
         end
-        throw(ArgumentError(
-            "\n$model <: Probabilistic but prediction_type($measure) = "*
-            ":$(prediction_type(measure)). "*suggestion*avoid))
+        throw(ERR_MEASURES_PROBABILISTIC(measure, suggestion))
     end
 
-    model isa Deterministic && prediction_type(measure) != :deterministic &&
-        throw(ArgumentError("$model <: Deterministic but "*
-                            "prediction_type($measure) ="*
-              ":$(prediction_type(measure))."*avoid))
+    model isa Deterministic &&
+        StatisticalMeasuresBase.kind_of_proxy(measure) != LearnAPI.LiteralTarget() &&
+        throw(ERR_MEASURES_DETERMINISTIC(measure))
 
     return true
 
@@ -682,13 +687,14 @@ function _actual_measures(measures, model)
         _measures = measures
     end
 
-    return _measures
+    # wrap in `robust_measure` to allow unsupported weights to be silently treated as
+    # uniform when invoked; `_check_measure` will throw appropriate warnings unless
+    # explicitly suppressed.
+    return StatisticalMeasuresBase.robust_measure.(_measures)
 
 end
 
 function _check_weights(weights, nrows)
-    weights isa AbstractVector{<:Real} ||
-        throw(ERR_WEIGHTS_REAL)
     length(weights) == nrows ||
         throw(ERR_WEIGHTS_LENGTH)
     return true
@@ -741,21 +747,35 @@ function _actual_operations(operation::Nothing,
                             verbosity)
     map(measures) do m
 
-        prediction_type = MLJBase.prediction_type(m)
-        target_scitype = MLJBase.target_scitype(m)
+        # `kind_of_proxy` is the measure trait corresponding to `prediction_type` model
+        # trait. But it's values are instances of LearnAPI.KindOfProxy, instead of
+        # symbols:
+        #
+        # `LearnAPI.LiteralTarget()` ~ `:deterministic` (`model isa Deterministic`)
+        # `LearnAPI.Distribution()` ~ `:probabilistic` (`model isa Deterministic`)
+        #
+        kind_of_proxy = StatisticalMeasuresBase.kind_of_proxy(m)
 
-        if prediction_type === :unknown
-            return predict
-        end
+        # `observation_type` is the measure trait which we need to match the model
+        # `target_scitype` but the latter refers to the whole target `y`, not a single
+        # observation.
+        #
+        # One day, models will have their own `observation_scitype`
+        observation_scitype = StatisticalMeasuresBase.observation_scitype(m)
+
+        # One day, models will implement LearnAPI and will get their own `kind_of_proxy`
+        # trait replacing `prediction_type` and `observation_scitype` trait replacing
+        # `target_scitype`.
+
+        isnothing(kind_of_proxy) && (return predict)
 
         if MLJBase.prediction_type(model) === :probabilistic
-            if prediction_type === :probabilistic
+            if kind_of_proxy === LearnAPI.Distribution()
                 return predict
-            elseif prediction_type === :deterministic
-                if target_scitype <: AbstractArray{<:Union{Missing,Finite}}
+            elseif kind_of_proxy === LearnAPI.LiteralTarget()
+                if observation_scitype <: Union{Missing,Finite}
                     return predict_mode
-                elseif target_scitype <:
-                    AbstractArray{<:Union{Missing,Continuous,Count}}
+                elseif observation_scitype <:Union{Missing,Infinite}
                     return predict_mean
                 else
                     throw(err_ambiguous_operation(model, m))
@@ -764,19 +784,21 @@ function _actual_operations(operation::Nothing,
                 throw(err_ambiguous_operation(model, m))
             end
         elseif MLJBase.prediction_type(model) === :deterministic
-            if prediction_type === :probabilistic
+            if kind_of_proxy === LearnAPI.Distribution()
                 throw(err_incompatible_prediction_types(model, m))
-            elseif prediction_type === :deterministic
+            elseif kind_of_proxy === LearnAPI.LiteralTarget()
                 return predict
             else
                 throw(err_ambiguous_operation(model, m))
             end
-        else
-            if prediction_type === :interval
+        elseif MLJBase.prediction_type(model) === :interval
+            if kind_of_proxy === LearnAPI.ConfidenceInterval()
                 return predict
             else
                 throw(err_ambiguous_operation(model, m))
             end
+        else
+            throw(err_ambiguous_operation(model, m))
         end
     end
 end
@@ -820,158 +842,123 @@ _process_accel_settings(accel) =  throw(ArgumentError("unsupported" *
 # --------------------------------------------------------------
 # User interface points: `evaluate!` and `evaluate`
 
+const RESAMPLING_STRATEGIES = subtypes(ResamplingStrategy)
+const RESAMPLING_STRATEGIES_LIST =
+    join(
+        map(RESAMPLING_STRATEGIES) do s
+             name = split(string(s), ".") |> last
+             "`$name`"
+        end,
+        ", ",
+        " and ",
+    )
+
 """
     log_evaluation(logger, performance_evaluation)
-Log a performance evaluation to `logger`, an object specific to some logging
-platform, such as mlflow. If `logger=nothing` then no logging is performed.
-The method is called at the end of every call to `evaluate/evaluate!` using
-the logger provided by the `logger` keyword argument.
+
+Log a performance evaluation to `logger`, an object specific to some logging platform,
+such as mlflow. If `logger=nothing` then no logging is performed.  The method is called at
+the end of every call to `evaluate/evaluate!` using the logger provided by the `logger`
+keyword argument.
+
 # Implementations for new logging platforms
-#
-Julia interfaces to workflow logging platforms, such as mlflow (provided by
-the MLFlowClient.jl interface) should overload
-`log_evaluation(logger::LoggerType, performance_evaluation)`,
-where `LoggerType` is a platform-specific type for logger objects. For an
-example, see the implementation provided by the MLJFlow.jl package.
+
+Julia interfaces to workflow logging platforms, such as mlflow (provided by the
+MLFlowClient.jl interface) should overload `log_evaluation(logger::LoggerType,
+performance_evaluation)`, where `LoggerType` is a platform-specific type for logger
+objects. For an example, see the implementation provided by the MLJFlow.jl package.
+
 """
 log_evaluation(logger, performance_evaluation) = nothing
 
 """
-    evaluate!(mach,
-              resampling=CV(),
-              measure=nothing,
-              rows=nothing,
-              weights=nothing,
-              class_weights=nothing,
-              operation=nothing,
-              repeats=1,
-              acceleration=default_resource(),
-              force=false,
-              verbosity=1,
-              check_measure=true,
-              logger=nothing)
-
-Estimate the performance of a machine `mach` wrapping a supervised
-model in data, using the specified `resampling` strategy (defaulting
-to 6-fold cross-validation) and `measure`, which can be a single
-measure or vector.
-
-Do `subtypes(MLJ.ResamplingStrategy)` to obtain a list of available
-resampling strategies. If `resampling` is not an object of type
-`MLJ.ResamplingStrategy`, then a vector of tuples (of the form
-`(train_rows, test_rows)` is expected. For example, setting
+    evaluate!(mach; resampling=CV(), measure=nothing, options...)
+
+Estimate the performance of a machine `mach` wrapping a supervised model in data, using
+the specified `resampling` strategy (defaulting to 6-fold cross-validation) and `measure`,
+which can be a single measure or vector. Returns a [`PerformanceEvaluation`](@ref)
+object.
+
+Available resampling strategies are $RESAMPLING_STRATEGIES_LIST. If `resampling` is not an
+instance of one of these, then a vector of tuples of the form `(train_rows, test_rows)`
+is expected. For example, setting
 
     resampling = [((1:100), (101:200)),
                    ((101:200), (1:100))]
 
 gives two-fold cross-validation using the first 200 rows of data.
 
-The type of operation (`predict`, `predict_mode`, etc) to be
-associated with `measure` is automatically inferred from measure
-traits where possible. For example, `predict_mode` will be used for a
-`Multiclass` target, if `model` is probabilistic but `measure` is
-deterministic. The operations applied can be inspected from the
-`operation` field of the object returned. Alternatively, operations
-can be explicitly specified using `operation=...`. If `measure` is a
-vector, then `operation` must be a single operation, which will be
-associated with all measures, or a vector of the same length as
-`measure`.
-
-The resampling strategy is applied repeatedly (Monte Carlo resampling)
-if `repeats > 1`. For example, if `repeats = 10`, then `resampling =
-CV(nfolds=5, shuffle=true)`, generates a total of 50 `(train, test)`
-pairs for evaluation and subsequent aggregation.
+Any measure conforming to the
+[StatisticalMeasuresBase.jl](https://juliaai.github.io/StatisticalMeasuresBase.jl/dev/)
+API can be provided, assuming it can consume multiple observations.
 
-If `resampling isa MLJ.ResamplingStrategy` then one may optionally
-restrict the data used in evaluation by specifying `rows`.
+Although `evaluate!` is mutating, `mach.model` and `mach.args` are not mutated.
 
-An optional `weights` vector may be passed for measures that support
-sample weights (`MLJ.supports_weights(measure) == true`), which is
-ignored by those that don't. These weights are not to be confused with
-any weights `w` bound to `mach` (as in `mach = machine(model, X,
-y, w)`). To pass these to the performance evaluation measures you must
-explictly specify `weights=w` in the `evaluate!` call.
+# Additional keyword options
 
-Additionally, optional `class_weights` dictionary may be passed
-for measures that support class weights
-(`MLJ.supports_class_weights(measure) == true`), which is
-ignored by those that don't. These weights are not to be confused with
-any weights `class_w` bound to `mach` (as in `mach = machine(model, X,
-y, class_w)`). To pass these to the performance evaluation measures you
-must explictly specify `class_weights=w` in the `evaluate!` call.
+- `rows` - vector of observation indices from which both train and test folds are
+  constructed (default is all observations)
 
-User-defined measures are supported; see the manual for details.
+- `operation`/`operations=nothing` - One of $PREDICT_OPERATIONS_STRING, or a vector of
+  these of the same length as `measure`/`measures`. Automatically inferred if left
+  unspecified. For example, `predict_mode` will be used for a `Multiclass` target, if
+  `model` is a probabilistic predictor, but `measure` is expects literal (point) target
+  predictions. Operations actually applied can be inspected from the `operation` field of
+  the object returned.
 
-If no measure is specified, then `default_measure(mach.model)` is
-used, unless this default is `nothing` and an error is thrown.
+- `weights` - per-sample `Real` weights for measures that support them (not to be confused
+  with weights used in training, such as the `w` in `mach = machine(model, X, y, w)`).
 
-The `acceleration` keyword argument is used to specify the compute resource (a
-subtype of `ComputationalResources.AbstractResource`) that will be used to
-accelerate/parallelize the resampling operation.
+- `class_weights` - dictionary of `Real` per-class weights for use with measures that
+  support these, in classification problems (not to be confused
+  with weights used in training, such as the `w` in `mach = machine(model, X, y, w)`).
 
-Although `evaluate!` is mutating, `mach.model` and `mach.args` are
-untouched.
+- `repeats::Int=1`: set to a higher value for repeated (Monte Carlo)
+  resampling. For example, if `repeats = 10`, then `resampling = CV(nfolds=5,
+  shuffle=true)`, generates a total of 50 `(train, test)` pairs for evaluation and
+  subsequent aggregation.
 
-### Summary of key-word arguments
+- `acceleration=CPU1()`: acceleration/parallelization option; can be any instance of
+  `CPU1`, (single-threaded computation), `CPUThreads` (multi-threaded computation) or
+  `CPUProcesses` (multi-process computation); default is `default_resource()`. These types
+  are owned by ComputationalResources.jl.
 
-- `resampling` - resampling strategy (default is `CV(nfolds=6)`)
-
-- `measure`/`measures` - measure or vector of measures (losses, scores, etc)
-
-- `rows` - vector of observation indices from which both train and
-  test folds are constructed (default is all observations)
-
-- `weights` - per-sample weights for measures that support them (not
-  to be confused with weights used in training)
-
-- `class_weights` - dictionary of per-class weights for use with
-  measures that support these, in classification problems (not to be
-  confused with per-sample `weights` or with class weights used in
-  training)
-
-- `operation`/`operations` - One of $PREDICT_OPERATIONS_STRING, or a
-  vector of these of the same length as
-  `measure`/`measures`. Automatically inferred if left unspecified.
-
-- `repeats` - default is 1; set to a higher value for repeated
-  (Monte Carlo) resampling
-
-- `acceleration` - parallelization option; currently supported
-  options are instances of `CPU1` (single-threaded computation)
-  `CPUThreads` (multi-threaded computation) and `CPUProcesses`
-  (multi-process computation); default is `default_resource()`.
-
-- `force` - default is `false`; set to `true` for force cold-restart
+- `force=false`: set to `true` to force cold-restart
   of each training event
 
-- `verbosity` level, an integer defaulting to 1.
-
-- `check_measure` - default is `true`
+- `verbosity::Int=1` logging level; can be negative
 
-- `logger` - a logger object (see [`MLJBase.log_evaluation`](@ref))
+- `check_measure=true`: whether to screen measures for possible incompatibility with the
+  model. Will not catch all incompatibilities.
 
+- `per_observation=true`: whether to calculate estimates for individual observations; if
+  `false` the `per_observation` field of the returned object is populated with
+  `missing`s. Setting to `false` may reduce compute time and allocations.
 
-### Return value
+- `logger` - a logger object (see [`MLJBase.log_evaluation`](@ref))
 
-A [`PerformanceEvaluation`](@ref) object. See
-[`PerformanceEvaluation`](@ref) for details.
+See also [`evaluate`](@ref), [`PerformanceEvaluation`](@ref)
 
 """
-function evaluate!(mach::Machine{<:Measurable};
-                   resampling=CV(),
-                   measures=nothing,
-                   measure=measures,
-                   weights=nothing,
-                   class_weights=nothing,
-                   operations=nothing,
-                   operation=operations,
-                   acceleration=default_resource(),
-                   rows=nothing,
-                   repeats=1,
-                   force=false,
-                   check_measure=true,
-                   verbosity=1,
-                   logger=nothing)
+
+function evaluate!(
+    mach::Machine{<:Measurable};
+    resampling=CV(),
+    measures=nothing,
+    measure=measures,
+    weights=nothing,
+    class_weights=nothing,
+    operations=nothing,
+    operation=operations,
+    acceleration=default_resource(),
+    rows=nothing,
+    repeats=1,
+    force=false,
+    check_measure=true,
+    per_observation=true,
+    verbosity=1,
+    logger=nothing,
+    )
 
     # this method just checks validity of options, preprocess the
     # weights, measures, operations, and dispatches a
@@ -1005,26 +992,52 @@ function evaluate!(mach::Machine{<:Measurable};
                             verbosity,
                             check_measure)
 
-    _warn_about_unsupported(supports_weights,
-                            "Sample", _measures, weights, verbosity)
-    _warn_about_unsupported(supports_class_weights,
-                            "Class", _measures, class_weights, verbosity)
+    _warn_about_unsupported(
+        StatisticalMeasuresBase.supports_weights,
+        "Sample",
+        _measures,
+        weights,
+        verbosity,
+    )
+    _warn_about_unsupported(
+        StatisticalMeasuresBase.supports_class_weights,
+        "Class",
+        _measures,
+        class_weights,
+        verbosity,
+    )
 
     _acceleration= _process_accel_settings(acceleration)
 
-    evaluate!(mach, resampling, weights, class_weights, rows, verbosity,
-              repeats, _measures, _operations, _acceleration, force, logger,
-              resampling)
-
+    evaluate!(
+        mach,
+        resampling,
+        weights,
+        class_weights,
+        rows,
+        verbosity,
+        repeats,
+        _measures,
+        _operations,
+        _acceleration,
+        force,
+        per_observation,
+        logger,
+        resampling,
+    )
 end
 
 """
-    evaluate(model, data...; cache=true, kw_options...)
+    evaluate(model, data...; cache=true, options...)
 
 Equivalent to `evaluate!(machine(model, data..., cache=cache);
-wk_options...)`.  See the machine version `evaluate!` for the complete
+options...)`.  See the machine version `evaluate!` for the complete
 list of options.
 
+Returns a  [`PerformanceEvaluation`](@ref) object.
+
+See also [`evaluate!`](@ref).
+
 """
 evaluate(model::Measurable, args...; cache=true, kwargs...) =
     evaluate!(machine(model, args...; cache=cache); kwargs...)
@@ -1173,30 +1186,32 @@ const AbstractRow = Union{AbstractVector{<:Integer}, Colon}
 const TrainTestPair = Tuple{AbstractRow, AbstractRow}
 const TrainTestPairs = AbstractVector{<:TrainTestPair}
 
-# helper:
-_feature_dependencies_exist(measures) =
-    !all(m->!(is_feature_dependent(m)), measures)
-
-# helper:
-function measure_specific_weights(measure, weights, class_weights, test)
-    supports_weights(measure) && supports_class_weights(measure) &&
-        error("Encountered a measure that simultaneously supports "*
-              "(per-sample) weights and class weights. ")
-    if supports_weights(measure)
-        weights === nothing && return nothing
-        return weights[test]
-    end
-    supports_class_weights(measure) && return class_weights
-    return nothing
-end
+_view(::Nothing, rows) = nothing
+_view(weights, rows) = view(weights, rows)
 
 # Evaluation when `resampling` is a TrainTestPairs (CORE EVALUATOR):
-# `user_resampling` keyword argument is the user defined resampling strategy
-function evaluate!(mach::Machine, resampling, weights, class_weights, rows,
-                   verbosity, repeats, measures, operations, acceleration,
-                   force, logger, user_resampling)
+function evaluate!(
+    mach::Machine,
+    resampling,
+    weights,
+    class_weights,
+    rows,
+    verbosity,
+    repeats,
+    measures,
+    operations,
+    acceleration,
+    force,
+    per_observation_flag,
+    logger,
+    user_resampling,
+    )
+
+    # Note: `user_resampling` keyword argument is the user-defined resampling strategy,
+    # while `resampling` is always a `TrainTestPairs`.
 
-    # Note: `rows` and `repeats` are ignored here
+    # Note: `rows` and `repeats` are only passed to the final `PeformanceEvaluation`
+    # object to be returned and are not otherwise used here.
 
     if !(resampling isa TrainTestPairs)
         error("`resampling` must be an "*
@@ -1206,12 +1221,21 @@ function evaluate!(mach::Machine, resampling, weights, class_weights, rows,
 
     X = mach.args[1]()
     y = mach.args[2]()
+    nrows = MLJBase.nrows(y)
 
     nfolds = length(resampling)
+    test_fold_sizes = map(resampling) do train_test_pair
+        test = last(train_test_pair)
+        test isa Colon && (return nrows)
+        length(test)
+    end
 
-    nmeasures = length(measures)
+    # weights used to aggregate per-fold measurements, which depends on a measures
+    # external mode of aggregation:
+    fold_weights(mode) = nfolds .* test_fold_sizes ./ sum(test_fold_sizes)
+    fold_weights(::StatisticalMeasuresBase.Sum) = nothing
 
-    feature_dependencies_exist = _feature_dependencies_exist(measures)
+    nmeasures = length(measures)
 
     function fit_and_extract_on_fold(mach, k)
         train, test = resampling[k]
@@ -1220,21 +1244,27 @@ function evaluate!(mach::Machine, resampling, weights, class_weights, rows,
         # that appear (`predict`, `predict_mode`, etc):
         yhat_given_operation =
             Dict(op=>op(mach, rows=test) for op in unique(operations))
-        if feature_dependencies_exist
-            Xtest = selectrows(X, test)
-        else
-            Xtest = nothing
-        end
-        ytest = selectrows(y, test)
 
-        measurements =  map(measures, operations) do m, op
-            wtest = measure_specific_weights(
-                m,
-                weights,
-                class_weights,
-                test
-            )
-            value(m, yhat_given_operation[op], Xtest, ytest, wtest)
+        ytest = selectrows(y, test)
+        if per_observation_flag
+            measurements =  map(measures, operations) do m, op
+                StatisticalMeasuresBase.measurements(
+                    m,
+                    yhat_given_operation[op],
+                    ytest,
+                    _view(weights, test),
+                    class_weights,
+                )
+            end
+        else
+            measurements =  map(measures, operations) do m, op
+                m(
+                    yhat_given_operation[op],
+                    ytest,
+                    _view(weights, test),
+                    class_weights,
+                )
+            end
         end
 
         fp = fitted_params(mach)
@@ -1267,27 +1297,38 @@ function evaluate!(mach::Machine, resampling, weights, class_weights, rows,
 
     measurements_flat = vcat(measurements_vector_of_vectors...)
 
-    # in the following rows=folds, columns=measures:
+    # In the `measurements_matrix` below, rows=folds, columns=measures; each element of
+    # the matrix is:
+    #
+    # - a vector of meausurements, one per observation within a fold, if
+    # - `per_observation_flag = true`; or
+    #
+    # - a single measurment for the whole fold, if `per_observation_flag = false`.
+    #
     measurements_matrix = permutedims(
         reshape(collect(measurements_flat), (nmeasures, nfolds))
     )
 
     # measurements for each observation:
-    per_observation = map(1:nmeasures) do k
-        m = measures[k]
-        if reports_each_observation(m)
-            measurements_matrix[:,k]
-        else
-            missing
-        end
+    per_observation = if per_observation_flag
+       map(1:nmeasures) do k
+           measurements_matrix[:,k]
+       end
+    else
+        fill(missing, nmeasures)
     end
 
     # measurements for each fold:
-    per_fold = map(1:nmeasures) do k
-        m = measures[k]
-        if reports_each_observation(m)
-            broadcast(MLJBase.aggregate, per_observation[k], [m,])
-        else
+    per_fold = if per_observation_flag
+        map(1:nmeasures) do k
+            m = measures[k]
+            mode = StatisticalMeasuresBase.external_aggregation_mode(m)
+            map(per_observation[k]) do v
+                StatisticalMeasuresBase.aggregate(v; mode)
+            end
+        end
+    else
+        map(1:nmeasures) do k
             measurements_matrix[:,k]
         end
     end
@@ -1295,7 +1336,12 @@ function evaluate!(mach::Machine, resampling, weights, class_weights, rows,
     # overall aggregates:
     per_measure = map(1:nmeasures) do k
         m = measures[k]
-        MLJBase.aggregate(per_fold[k], m)
+        mode = StatisticalMeasuresBase.external_aggregation_mode(m)
+        StatisticalMeasuresBase.aggregate(
+            per_fold[k];
+            mode,
+            weights=fold_weights(mode),
+        )
     end
 
     evaluation = PerformanceEvaluation(
@@ -1358,39 +1404,36 @@ end
         repeats = 1,
         acceleration=default_resource(),
         check_measure=true,
-        logger=nothing
+        per_observation=true,
+        logger=nothing,
     )
 
-Resampling model wrapper, used internally by the `fit` method of
-`TunedModel` instances and `IteratedModel` instances. See
-[`evaluate!](@ref) for options. Not intended for general use.
+Resampling model wrapper, used internally by the `fit` method of `TunedModel` instances
+and `IteratedModel` instances. See [`evaluate!](@ref) for options. Not intended for use by
+general user, who will ordinarily use [`evaluate!`](@ref) directly.
 
-Given a machine `mach = machine(resampler, args...)` one obtains a
-performance evaluation of the specified `model`, performed according
-to the prescribed `resampling` strategy and other parameters, using
-data `args...`, by calling `fit!(mach)` followed by
+Given a machine `mach = machine(resampler, args...)` one obtains a performance evaluation
+of the specified `model`, performed according to the prescribed `resampling` strategy and
+other parameters, using data `args...`, by calling `fit!(mach)` followed by
 `evaluate(mach)`.
 
-On subsequent calls to `fit!(mach)` new train/test pairs of row
-indices are only regenerated if `resampling`, `repeats` or `cache`
-fields of `resampler` have changed. The evolution of an RNG field of
-`resampler` does *not* constitute a change (`==` for `MLJType` objects
-is not sensitive to such changes; see [`is_same_except'](@ref)).
+On subsequent calls to `fit!(mach)` new train/test pairs of row indices are only
+regenerated if `resampling`, `repeats` or `cache` fields of `resampler` have changed. The
+evolution of an RNG field of `resampler` does *not* constitute a change (`==` for
+`MLJType` objects is not sensitive to such changes; see [`is_same_except`](@ref)).
 
-If there is single train/test pair, then warm-restart behavior of the
-wrapped model `resampler.model` will extend to warm-restart behaviour
-of the wrapper `resampler`, with respect to mutations of the wrapped
-model.
+If there is single train/test pair, then warm-restart behavior of the wrapped model
+`resampler.model` will extend to warm-restart behaviour of the wrapper `resampler`, with
+respect to mutations of the wrapped model.
 
-The sample `weights` are passed to the specified performance measures
-that support weights for evaluation. These weights are not to be
-confused with any weights bound to a `Resampler` instance in a
-machine, used for training the wrapped `model` when supported.
+The sample `weights` are passed to the specified performance measures that support weights
+for evaluation. These weights are not to be confused with any weights bound to a
+`Resampler` instance in a machine, used for training the wrapped `model` when supported.
 
-The sample `class_weights` are passed to the specified performance
-measures that support per-class weights for evaluation. These weights
-are not to be confused with any weights bound to a `Resampler` instance
-in a machine, used for training the wrapped `model` when supported.
+The sample `class_weights` are passed to the specified performance measures that support
+per-class weights for evaluation. These weights are not to be confused with any weights
+bound to a `Resampler` instance in a machine, used for training the wrapped `model` when
+supported.
 
 """
 mutable struct Resampler{S, L} <: Model
@@ -1404,6 +1447,7 @@ mutable struct Resampler{S, L} <: Model
     check_measure::Bool
     repeats::Int
     cache::Bool
+    per_observation::Bool
     logger::L
 end
 
@@ -1433,18 +1477,21 @@ function MLJModelInterface.clean!(resampler::Resampler)
     return warning
 end
 
-function Resampler(;
-    model=nothing,
+function Resampler(
+    ;model=nothing,
     resampling=CV(),
-    measure=nothing,
+    measures=nothing,
+    measure=measures,
     weights=nothing,
     class_weights=nothing,
-    operation=predict,
+    operations=predict,
+    operation=operations,
     acceleration=default_resource(),
     check_measure=true,
     repeats=1,
     cache=true,
-    logger=nothing
+    per_observation=true,
+    logger=nothing,
 )
     resampler = Resampler(
         model,
@@ -1457,7 +1504,8 @@ function Resampler(;
         check_measure,
         repeats,
         cache,
-        logger
+        per_observation,
+        logger,
     )
     message = MLJModelInterface.clean!(resampler)
     isempty(message) || @warn message
@@ -1503,8 +1551,9 @@ function MLJModelInterface.fit(resampler::Resampler, verbosity::Int, args...)
         _operations,
         _acceleration,
         false,
+        resampler.per_observation,
         resampler.logger,
-        resampler.resampling
+        resampler.resampling,
     )
 
     fitresult = (machine = mach, evaluation = e)
@@ -1568,8 +1617,9 @@ function MLJModelInterface.update(
         operations,
         acceleration,
         false,
+        resampler.per_observation,
         resampler.logger,
-        resampler.resampling
+        resampler.resampling,
     )
     report = (evaluation = e, )
     fitresult = (machine=mach2, evaluation=e)
diff --git a/src/utilities.jl b/src/utilities.jl
index 66dd62b7..969fce4c 100644
--- a/src/utilities.jl
+++ b/src/utilities.jl
@@ -135,20 +135,6 @@ function recursive_setproperty!(obj, ex::Expr, value)
     return recursive_setproperty!(last_obj, field, value)
 end
 
-"""
-    check_dimensions(X, Y)
-
-Internal function to check two arrays have the same shape.
-
-"""
-@inline function check_dimensions(X, Y)
-    size(X)  == size(Y) ||
-        throw(DimensionMismatch(
-            "Encountered two objects with sizes $(size(X)) and "*
-            "$(size(Y)) which needed to match but don't. "))
-    return nothing
-end
-
 """
     check_same_nrows(X, Y)
 
@@ -469,3 +455,93 @@ end
 
 generate_name!(model, existing_names; kwargs...) =
     generate_name!(typeof(model), existing_names; kwargs...)
+
+
+# # OBSERVATION VS CONTAINER HACKINGS TOOLS
+
+# The following tools are used to bridge the gap between old paradigm of prescribing
+# the scitype of containers of observations, and the LearnAPI.jl paradigm of prescribing
+# only the scitype of the observations themeselves. This is needed because measures are
+# now taken from StatisticalMeasures.jl which follows the LearnAPI.jl paradigm, but model
+# `target_scitype` refers to containers.
+
+"""
+    observation(S)
+
+*Private method.*
+
+Tries to infer the per-observation scitype from the scitype of `S`, when `S` is known to
+be the scitype of some container with multiple observations; here we view the scitype for
+one row of a table to be the scitype of the row converted to a vector. Return `Unknown` if
+unable to draw reliable inferrence.
+
+
+The observation scitype for a table is here understood as the scitype of a row converted
+to a vector.
+
+"""
+observation(::Type) = Unknown
+observation(::Type{AbstractVector{S}}) where S = S
+observation(::Type{AbstractArray{S,N}}) where {S,N} = AbstractArray{S,N-1}
+for T in [:Continuous, :Count, :Finite, :Infinite, :Multiclass, :OrderedFactor]
+    TM = "Union{Missing,$T}" |> Meta.parse
+    for S in [T, TM]
+        quote
+            observation(::Type{AbstractVector{<:$S}}) = $S
+            observation(::Type{AbstractArray{<:$S,N}}) where N = AbstractArray{<:$S,N-1}
+            observation(::Type{Table{<:AbstractVector{<:$S}}}) = AbstractVector{<:$S}
+        end |> eval
+    end
+end
+# note that in Julia `f(::Type{AbstractVector{<:T}}) where T = T` has not a well-formed
+# left-hand side
+
+"""
+    guess_observation_scitype(y)
+
+*Private method.*
+
+If `y` is an `AbstractArray`, return the scitype of `y[:, :, ..., :, 1]`. If `y` is a
+table, return the scitype of the first row, converted to a vector, unless this row has
+`missing` elements, in which case return `Unknown`.
+
+In all other cases, `Unknown`.
+
+```
+julia> guess_observation_scitype([missing, 1, 2, 3])
+Union{Missing, Count}
+
+julia> guess_observation_scitype(rand(3, 2))
+AbstractVector{Continuous}
+
+julia> guess_observation_scitype((x=rand(3), y=rand(Bool, 3)))
+AbstractVector{Union{Continuous, Count}}
+
+julia> guess_observation_scitype((x=[missing, 1, 2], y=[1, 2, 3]))
+Unknown
+```
+"""
+guess_observation_scitype(y) = guess_observation_scitype(y, Val(Tables.istable(y)))
+guess_observation_scitype(y, ::Any) = Unknown
+guess_observation_scitype(y::AbstractArray, ::Val{false}) = observation(scitype(y))
+function guess_observation_scitype(table, ::Val{true})
+    row = Tables.subset(table, 1, viewhint=false) |> collect
+    E = eltype(row)
+    nonmissingtype(E) == E || return Unknown
+    scitype(row)
+end
+
+"""
+    guess_model_targetobservation_scitype(model)
+
+*Private method*
+
+Try to infer a lowest upper bound on the scitype of target observations acceptable to
+`model`, by inspecting `target_scitype(model)`. Return `Unknown` if unable to draw reliable
+inferrence.
+
+The observation scitype for a table is here understood as the scitype of a row converted
+to a vector.
+
+"""
+guess_model_target_observation_scitype(model) =  observation(target_scitype(model))
diff --git a/test/_models/simple_composite_model.jl b/test/_models/simple_composite_model.jl
index 0ff413cb..09951d49 100644
--- a/test/_models/simple_composite_model.jl
+++ b/test/_models/simple_composite_model.jl
@@ -1,47 +1,38 @@
-export SimpleDeterministicCompositeModel, SimpleDeterministicNetworkCompositeModel,
-    SimpleProbabilisticCompositeModel, SimpleProbabilisticNetworkCompositeModel
+export  SimpleDeterministicNetworkCompositeModel,
+    SimpleProbabilisticNetworkCompositeModel
 
 using MLJBase
 
 const COMPOSITE_MODELS = [
-    :SimpleDeterministicCompositeModel,
-    :SimpleProbabilisticCompositeModel,
     :SimpleDeterministicNetworkCompositeModel,
     :SimpleProbabilisticNetworkCompositeModel
 ]
 const REGRESSORS = Dict(
-    :SimpleDeterministicCompositeModel => :DeterministicConstantRegressor,
     :SimpleDeterministicNetworkCompositeModel => :DeterministicConstantRegressor,
-    :SimpleProbabilisticCompositeModel => :ConstantRegressor,
     :SimpleProbabilisticNetworkCompositeModel => :ConstantRegressor,
 )
 
 const REGRESSOR_SUPERTYPES = Dict(
-    :SimpleDeterministicCompositeModel => :Deterministic,
     :SimpleDeterministicNetworkCompositeModel => :Deterministic,
-    :SimpleProbabilisticCompositeModel => :Probabilistic,
     :SimpleProbabilisticNetworkCompositeModel => :Probabilistic,
 )
 
 const COMPOSITE_SUPERTYPES = Dict(
-    :SimpleDeterministicCompositeModel => :DeterministicComposite,
     :SimpleDeterministicNetworkCompositeModel => :DeterministicNetworkComposite,
-    :SimpleProbabilisticCompositeModel => :ProbabilisticComposite,
     :SimpleProbabilisticNetworkCompositeModel => :ProbabilisticNetworkComposite,
 )
 
-
 for model in COMPOSITE_MODELS
     regressor = REGRESSORS[model]
     regressor_supertype = REGRESSOR_SUPERTYPES[model]
     composite_supertype = COMPOSITE_SUPERTYPES[model]
-    quote 
+    quote
         """
             (model)(; regressor=$($(regressor))(), transformer=FeatureSelector())
 
         Construct a composite model consisting of a transformer
-        (`Unsupervised` model) followed by a `$($(regressor_supertype))` model. Mainly
-        intended for internal testing .
+        (`Unsupervised` model) followed by a `$($(regressor_supertype))` model.
+        Intended for internal testing only.
 
         """
         mutable struct $(model){
@@ -67,36 +58,18 @@ for model in COMPOSITE_MODELS
             is_pure_julia = true,
             is_wrapper = true
         )
-        
+
         MLJBase.input_scitype(::Type{<:$(model){L,T}}) where {L,T} =
             MLJBase.input_scitype(T)
         MLJBase.target_scitype(::Type{<:$(model){L,T}}) where {L,T} =
             MLJBase.target_scitype(L)
-        
+
     end |> eval
 end
 
 ## FIT METHODS
-for model in COMPOSITE_MODELS[1:2]
-    @eval function MLJBase.fit(
-        composite::$(model), verbosity::Integer, Xtrain, ytrain
-    )
-        X = source(Xtrain) # instantiates a source node
-        y = source(ytrain)
-
-        t = machine(composite.transformer, X)
-        Xt = transform(t, X)
 
-        l = machine(composite.model, Xt, y)
-        yhat = predict(l, Xt)
-
-        mach = machine($(REGRESSOR_SUPERTYPES[model])(), X, y; predict=yhat)
-
-        return!(mach, composite, verbosity)
-    end
-end
-
-for model in COMPOSITE_MODELS[3:4]
+for model in COMPOSITE_MODELS
     @eval function MLJBase.prefit(
         composite::$(model),
         verbosity::Integer,
diff --git a/test/composition/learning_networks/deprecated_machines.jl b/test/composition/learning_networks/deprecated_machines.jl
deleted file mode 100644
index 19b580d6..00000000
--- a/test/composition/learning_networks/deprecated_machines.jl
+++ /dev/null
@@ -1,167 +0,0 @@
-module TestLearningNetworkMachines
-
-const depwarn=false
-
-using Test
-using ..Models
-using ..TestUtilities
-using MLJBase
-using Tables
-using StableRNGs
-using Serialization
-rng = StableRNG(616161)
-
-# A dummy clustering model:
-mutable struct DummyClusterer <: Unsupervised
-    n::Int
-end
-DummyClusterer(; n=3) = DummyClusterer(n)
-function MLJBase.fit(model::DummyClusterer, verbosity::Int, X)
-    Xmatrix = Tables.matrix(X)
-    n = min(size(Xmatrix, 2), model.n)
-    centres = Xmatrix[1:n, :]
-    levels = categorical(1:n)
-    report = (centres=centres,)
-    fitresult = levels
-    return fitresult, nothing, report
-end
-MLJBase.transform(model::DummyClusterer, fitresult, Xnew) =
-    selectcols(Xnew, 1:length(fitresult))
-MLJBase.predict(model::DummyClusterer, fitresult, Xnew) =
-    [fill(fitresult[1], nrows(Xnew))...]
-
-
-N = 20
-X = (a = rand(N), b = categorical(rand("FM", N)))
-
-@testset "signature helpers" begin
-    @test MLJBase._call(NamedTuple()) == NamedTuple()
-    a = source(:a)
-    b = source(:b)
-    W = source(:W)
-    yhat = source(:yhat)
-    s = (transform=W,
-         report=(a=a, b=b),
-         predict=yhat)
-    @test MLJBase._report_part(s) == (a=a, b=b)
-    @test MLJBase._operation_part(s) == (transform=W, predict=yhat)
-    @test MLJBase._nodes(s) == (W, yhat, a, b)
-    @test MLJBase._operations(s) == (:transform, :predict)
-    R = MLJBase._call(MLJBase._report_part(s))
-    @test R.a == :a
-    @test R.b == :b
-end
-
-@testset "wrapping a learning network in a machine" begin
-
-    # unsupervised:
-    Xs = source(X)
-    W = transform(machine(OneHotEncoder(), Xs), Xs)
-    clust = DummyClusterer(n=2)
-    m = machine(clust, W)
-    yhat = predict(m, W)
-    Wout = transform(m, W)
-    rnode = source(:stuff)
-
-    # test of `fitted_params(::NamedTuple)':
-    fit!(Wout, verbosity=0)
-
-    @test_throws(MLJBase.ERR_BAD_SIGNATURE,
-                 machine(Unsupervised();
-                         predict=yhat,
-                         fitted_params=rnode,
-                         depwarn)
-                 )
-    @test_throws(MLJBase.ERR_EXPECTED_NODE_IN_SIGNATURE,
-                 machine(Unsupervised();
-                         predict=42,
-                         depwarn)
-                 )
-    @test_throws(MLJBase.ERR_EXPECTED_NODE_IN_SIGNATURE,
-                 machine(Unsupervised(), Xs;
-                         predict=yhat,
-                         transform=Wout,
-                         report=(some_stuff=42,),
-                         depwarn)
-                 )
-    mach = machine(Unsupervised(), Xs;
-                   predict=yhat,
-                   transform=Wout,
-                   report=(some_stuff=rnode,),
-                   depwarn)
-    @test mach.args == (Xs, )
-    @test mach.args[1] == Xs
-    fit!(mach, force=true, verbosity=0)
-    Θ = mach.fitresult
-    @test Θ.predict == yhat
-    @test Θ.transform == Wout
-    Θ.report.some_stuff == rnode
-    @test report(mach).some_stuff == :stuff
-    @test report(mach).machines == fitted_params(mach).machines
-
-    # supervised
-    y = rand("ab", N) |> categorical;
-    ys = source(y)
-    mm = machine(ConstantClassifier(), W, ys)
-    yhat = predict(mm, W)
-    e = @node auc(yhat, ys)
-
-    @test_throws Exception machine(; predict=yhat, depwarn)
-    mach = machine(Probabilistic(), Xs, ys;
-                   predict=yhat,
-                   report=(training_auc=e,),
-                   depwarn)
-    @test mach.model isa Probabilistic
-    @test_throws ArgumentError machine(Probabilistic(), Xs, ys; depwarn)
-    @test_throws ArgumentError machine(Probabilistic(), Xs, ys;
-                                       report=(training_auc=e,),
-                                       depwarn)
-
-    # test extra report items coming from `training_auc=e` above
-    fit!(mach, verbosity=0)
-    err = auc(yhat(), y)
-    @test report(mach).training_auc ≈ err
-
-    # supervised - predict_mode
-    @test predict_mode(mach, X) == mode.(predict(mach, X))
-    predict_mode(mach, rows=1:2) == predict_mode(mach, rows=:)[1:2]
-
-    # evaluate a learning machine
-    evaluate!(mach, measure=LogLoss(), verbosity=0)
-
-    # supervised - predict_median, predict_mean
-    X1, y1 = make_regression(20)
-
-    Xs = source(X1); ys = source(y1)
-    mm = machine(ConstantRegressor(), Xs, ys)
-    yhat = predict(mm, Xs)
-    mach = fit!(machine(Probabilistic(), Xs, ys; predict=yhat, depwarn), verbosity=0)
-    @test predict_mean(mach, X1) ≈ mean.(predict(mach, X1))
-    @test predict_median(mach, X1) ≈ median.(predict(mach, X1))
-
-end
-
-mutable struct DummyComposite <: DeterministicComposite
-    stand1
-    stand2
-end
-
-@testset "issue 377" begin
-    stand = Standardizer()
-    model = DummyComposite(stand, stand)
-
-    Xs = source()
-    mach1 = machine(model.stand1, Xs)
-    X1 = transform(mach1, Xs)
-    mach2 = machine(model.stand2, X1)
-    X2 = transform(mach2, X1)
-
-    mach = machine(Unsupervised(), Xs; transform=X2, depwarn)
-    @test_logs((:error, r"The hyper"),
-               @test_throws(ArgumentError,
-                            MLJBase.network_model_names(model, mach)))
-end
-
-end
-
-true
diff --git a/test/composition/learning_networks/nodes.jl b/test/composition/learning_networks/nodes.jl
index 1f175d45..e79cec9d 100644
--- a/test/composition/learning_networks/nodes.jl
+++ b/test/composition/learning_networks/nodes.jl
@@ -6,6 +6,7 @@ using MLJBase
 using ..Models
 using ..TestUtilities
 using CategoricalArrays
+using StatisticalMeasures
 import Random.seed!
 seed!(1234)
 
diff --git a/test/composition/learning_networks/replace.jl b/test/composition/learning_networks/replace.jl
index fab3f16c..6186bf9c 100644
--- a/test/composition/learning_networks/replace.jl
+++ b/test/composition/learning_networks/replace.jl
@@ -33,8 +33,6 @@ zhat = inverse_transform(standM, uhat)
 yhat = exp(zhat)
 enode = @node mae(ys, yhat)
 
-_header(accel) =
-
 @testset "replace()  method; $(typeof(accel))" for accel in (CPU1(), CPUThreads())
 
     fit!(yhat, verbosity=0, acceleration=accel)
@@ -50,15 +48,12 @@ _header(accel) =
     knn2 = deepcopy(knn)
 
     # duplicate the network with `yhat` as glb:
-    yhat_clone = @test_logs(
-        (:warn, r"No replacement"),
-        replace(
-            yhat,
-            hot=>hot2,
-            knn=>knn2,
-            ys=>source(42);
-            copy_models_deeply=false,
-        ),
+    yhat_clone = replace(
+        yhat,
+        hot=>hot2,
+        knn=>knn2,
+        ys=>source(42);
+        copy_unspecified_deeply=false,
     )
 
     # test models and sources duplicated correctly:
@@ -79,16 +74,13 @@ _header(accel) =
     @test all(isempty, sources(yhat_ser))
 
     # duplicate a signature:
-    signature = (predict=yhat, report=(mae=enode,)) |> MLJBase.signature
-    signature_clone = @test_logs(
-        (:warn, r"No replacement"),
-        replace(
-            signature,
-            hot=>hot2,
-            knn=>knn2,
-            ys=>source(42);
-            copy_models_deeply=false,
-        )
+    signature = (predict=yhat, report=(mae=enode,)) |> MLJBase.Signature
+    signature_clone = replace(
+        signature,
+        hot=>hot2,
+        knn=>knn2,
+        ys=>source(2*y);
+        copy_unspecified_deeply=false,
     )
     glb_node = glb(signature_clone)
     models_clone = MLJBase.models(glb_node)
@@ -97,28 +89,20 @@ _header(accel) =
     @test models_clone[3] === hot2
     sources_clone = sources(glb_node)
     @test sources_clone[1]() == X
-    @test sources_clone[2]() === 42
+    @test sources_clone[2]() == 2*y
+
+    # warning thrown
+    @test_logs(
+        (:warn, r"No replacement"),
+        replace(
+            signature,
+            hot=>hot2,
+            knn=>knn2,
+            ys=>source(2*y);
+        ),
+    )
 
-    # duplicate a learning network machine:
-    mach  = machine(Deterministic(), Xs, ys;
-                    predict=yhat,
-                    report=(mae=enode,))
-    mach2 = replace(mach, hot=>hot2, knn=>knn2,
-                    ys=>source(ys.data);
-                    empty_unspecified_sources=true)
-    ss = sources(glb(mach2))
-    @test isempty(ss[1])
-    mach2 = @test_logs((:warn, r"No replacement"),
-                       replace(mach, hot=>hot2, knn=>knn2,
-                               ys=>source(ys.data)))
-    yhat2 = mach2.fitresult.predict
-    fit!(mach, verbosity=0)
-    fit!(mach2, verbosity=0)
-    @test predict(mach, X) ≈ predict(mach2, X)
-    @test report(mach).mae ≈ report(mach2).mae
-
-    @test mach2.args[1]() == Xs()
-    @test mach2.args[2]() == ys()
+    yhat2 = MLJBase.operation_nodes(signature_clone).predict
 
 
     ## EXTRA TESTS FOR TRAINING SEQUENCE
@@ -141,9 +125,7 @@ _header(accel) =
 
     @test length(MLJBase.machines(yhat)) == length(MLJBase.machines(yhat2))
     @test MLJBase.models(yhat) == MLJBase.models(yhat2)
-    @test sources(yhat) == sources(yhat2)
-    @test MLJBase.tree(yhat) == MLJBase.tree(yhat2)
-    @test yhat() ≈ yhat2()
+    @test 2yhat() ≈ yhat2()
 
     # this change should trigger retraining of all machines except the
     # univariate standardizer:
@@ -159,7 +141,6 @@ _header(accel) =
                          (:train, oakM2), (:train, knnM2)])
 end
 
-
 end # module
 
 true
diff --git a/test/composition/learning_networks/signatures.jl b/test/composition/learning_networks/signatures.jl
index 08785b40..019a9cd5 100644
--- a/test/composition/learning_networks/signatures.jl
+++ b/test/composition/learning_networks/signatures.jl
@@ -7,6 +7,7 @@ using Tables
 using Test
 using MLJModelInterface
 using OrderedCollections
+using StatisticalMeasures
 
 @testset "signatures - accessor functions" begin
     a = source(:a)
diff --git a/test/composition/models/deprecated_from_network.jl b/test/composition/models/deprecated_from_network.jl
deleted file mode 100644
index 15b56d03..00000000
--- a/test/composition/models/deprecated_from_network.jl
+++ /dev/null
@@ -1,621 +0,0 @@
-module TestFromComposite
-
-using Test
-using Tables
-using MLJBase
-using ..Models
-using ..TestUtilities
-using CategoricalArrays
-using StableRNGs
-using Parameters
-rng = StableRNG(616161)
-
-ridge_model = FooBarRegressor(lambda=0.1)
-selector_model = FeatureSelector()
-
-import MLJBase.@nodepwarn_from_network
-const depwarn = false
-
-## FROM_NETWORK_PREPROCESS
-
-# supervised:
-Xs = source(nothing)
-ys = source(nothing)
-z  = log(ys)
-stand = UnivariateStandardizer()
-standM = machine(stand, z)
-u = transform(standM, z)
-hot = OneHotEncoder()
-hotM = machine(hot, Xs)
-W = transform(hotM, Xs)
-knn = KNNRegressor()
-knnM = machine(knn, W, u)
-oak = DecisionTreeRegressor()
-oakM = machine(oak, W, u)
-uhat = 0.5*(predict(knnM, W) + predict(oakM, W))
-zhat = inverse_transform(standM, uhat)
-yhat = exp(zhat)
-
-mach_ex = :(machine(Deterministic(), Xs, ys; predict=yhat, depwarn=false))
-
-## TESTING `from_network_preprocess`
-
-ex = Meta.parse(
-    "begin
-         mutable struct CompositeX
-             knn_rgs=knn
-             one_hot_enc=hot
-         end
-         target_scitype=AbstractVector{<:Continuous}
-         input_scitype=Table(Continuous,Multiclass)
-     end")
-mach_, modeltype_ex, struct_ex, no_fields, dic =
-    MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex)
-
-eval(Parameters.with_kw(struct_ex, TestFromComposite, false))
-@test supertype(CompositeX) == DeterministicComposite
-composite = CompositeX()
-@test composite.knn_rgs == knn
-@test composite.one_hot_enc == hot
-@test dic[:target_scitype] == :(AbstractVector{<:Continuous})
-@test dic[:input_scitype] == :(Table(Continuous, Multiclass))
-
-ex = Meta.parse(
-    "begin
-         mutable struct Composite4 <: ProbabilisticComposite
-             knn_rgs=knn
-             one_hot_enc=hot
-         end
-     end")
-mach_, modeltype_ex, struct_ex =
-    MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex)
-eval(Parameters.with_kw(struct_ex, TestFromComposite, false))
-@test supertype(Composite4) == ProbabilisticComposite
-
-ex = Meta.parse(
-    "mutable struct Composite2
-        knn_rgs=knn
-        one_hot_enc=hot
-     end")
-mach_, modeltype_ex, struct_ex, no_fields, dic =
-    MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex)
-eval(Parameters.with_kw(struct_ex, TestFromComposite, false))
-composite = Composite2()
-@test composite.knn_rgs == knn
-@test composite.one_hot_enc == hot
-
-ex = Meta.parse(
-    "begin
-         mutable struct Composite6 <: Probabilistic
-             knn_rgs=knn
-             one_hot_enc=hot
-         end
-     end")
-@test_logs((:warn, r"New composite"),
-           MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex))
-
-ex = Meta.parse(
-    "begin
-         mutable struct Composite20
-             knn_rgs=knn
-             one_hot_enc=hot
-         end
-         target_scitype == Continuous
-     end")
-@test_throws(ArgumentError,
-             MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex))
-
-ex = Meta.parse(
-    "begin
-         mutable struct Composite20
-             knn_rgs=knn
-             one_hot_enc=hot
-         end
-         Continuous
-     end")
-@test_throws(ArgumentError,
-             MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex))
-
-ex = Meta.parse(
-    "begin
-         mutable struct Composite20
-             knn_rgs=knn
-             one_hot_enc=hot
-         end
-         43 = Continuous
-     end")
-@test_throws(ArgumentError,
-             MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex))
-
-ex = Meta.parse(
-    "begin
-         mutable struct Composite7 < Probabilistic
-             knn_rgs=knn
-             one_hot_enc=hot
-         end
-     end")
-@test_throws(ArgumentError,
-           MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex))
-
-@test_throws(ArgumentError,
-             MLJBase.from_network_preprocess(TestFromComposite, knn, ex))
-
-ex = Meta.parse(
-    "begin
-         Composite3(
-             knn_rgs=knn,
-             one_hot_enc=hot)
-     end")
-@test_throws(ArgumentError,
-             MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex))
-
-ex = Meta.parse(
-    "begin
-         mutable struct Composite8
-             knn_rgs::KNNRegressor=knn
-             one_hot_enc=hot
-         end
-     end")
-mach_, modeltype_ex, struct_ex =
-    MLJBase.from_network_preprocess(TestFromComposite, mach_ex, ex)
-eval(Parameters.with_kw(struct_ex, TestFromComposite, false))
-VERSION ≥ v"1.3.0-" &&
-    @test fieldtypes(Composite8) == (KNNRegressor, Any)
-
-# test that you cannot leave "default" component models unspecified:
-modeltype_ex = :Composite9
-struct_ex = :(mutable struct Composite9 <: DeterministicComposite
-              knn_rgs::KNNRegressor
-              one_hot_enc = hot
-              end)
-@test_logs (:error, r"Problem instantiating") begin
-    @test_throws Exception begin
-        MLJBase.from_network_(TestFromComposite,
-                              mach_ex, modeltype_ex,
-                              struct_ex, false, Dict{Symbol,Any}())
-    end
-end
-
-
-## TEST MACRO-EXPORTED  NETWORKS
-# (CANNOT WRAP IN @testset)
-
-# some actual data:
-N = 10
-X = MLJBase.table(rand(N, 3))
-y = rand(N)
-w = rand(N)
-
-# supervised with sample weights:
-ws = source()
-knnM = machine(knn, W, u, ws)
-uhat = 0.5*(predict(knnM, W) + predict(oakM, W))
-zhat = inverse_transform(standM, uhat)
-yhat = exp(zhat)
-
-@nodepwarn_from_network machine(
-    Deterministic(), Xs, ys, ws; predict=yhat, depwarn=false
-) begin
-    mutable struct CompositeX1
-        knn_rgs=knn
-        one_hot_enc=hot
-    end
-    supports_weights = true
-    target_scitype = AbstractVector{<:Continuous}
-end
-model = CompositeX1()
-@test supports_weights(model)
-@test target_scitype(model) == AbstractVector{<:Continuous}
-@test_logs((:warn, r""), predict(fit!(machine(model, X, y, w), verbosity=-1), X));
-# unsupervised:
-@nodepwarn_from_network machine(Unsupervised(), Xs; transform=W, depwarn=false) begin
-    mutable struct CompositeX2
-        one_hot_enc=hot
-    end
-end
-model = CompositeX2()
-@test_logs((:warn, r""), transform(fit!(machine(model, X), verbosity=-1), X));
-
-
-# second supervised test:
-fea = FeatureSelector()
-feaM = machine(fea, Xs)
-G = transform(feaM, Xs)
-hotM = machine(hot, G)
-H = transform(hotM, G)
-elm = DecisionTreeClassifier()
-elmM = machine(elm, H, ys)
-yhat = predict(elmM, H)
-
-@nodepwarn_from_network machine(Probabilistic(), Xs, ys; predict=yhat, depwarn=false) begin
-    mutable struct CompositeX3
-        selector=fea
-        one_hot=hot
-        tree=elm
-    end
-end
-model = CompositeX3()
-y = coerce(y, Multiclass)
-@test @test_logs((:warn, r""), predict(fit!(machine(model, X, y), verbosity=-1), X)) isa
-    AbstractVector{<:UnivariateFinite}
-
-# yet more examples:
-x1 = map(n -> mod(n,3), rand(rng, UInt8, 100)) |> categorical;
-x2 = randn(rng, 100);
-X = (x1=x1, x2=x2);
-y = x2.^2;
-
-Xs = source(X)
-ys = source(y)
-z = log(ys)
-stand = UnivariateStandardizer()
-standM = machine(stand, z)
-u = transform(standM, z)
-hot = OneHotEncoder()
-hotM = machine(hot, Xs)
-W = transform(hotM, Xs)
-knn = KNNRegressor()
-knnM = machine(knn, W, u)
-oak = DecisionTreeRegressor()
-oakM = machine(oak, W, u)
-uhat = 0.5*(predict(knnM, W) + predict(oakM, W))
-zhat = inverse_transform(standM, uhat)
-yhat = exp(zhat)
-
-mach = machine(Deterministic(), Xs, ys; predict=yhat, depwarn=false)
-
-@nodepwarn_from_network mach begin
-    mutable struct Composite10
-        knn_rgs::KNNRegressor=knn
-        one_hot_enc=hot
-    end
-end
-
-model_ = Composite10()
-
-mach = machine(model_, X, y)
-
-@test_logs((:warn, r""),
-           @test_model_sequence(fit_only!(mach),
-                                [(:train, model_), (:train, stand), (:train, hot),
-                                 (:train, knn), (:train, oak)],
-                                [(:train, model_), (:train, hot), (:train, stand),
-                                 (:train, knn), (:train, oak)],
-                                [(:train, model_), (:train, stand), (:train, hot),
-                                 (:train, oak), (:train, knn)],
-                                [(:train, model_), (:train, hot), (:train, stand),
-                                 (:train, oak), (:train, knn)])
-           )
-
-model_.knn_rgs.K = 55
-knn = model_.knn_rgs
-@test_model_sequence(fit_only!(mach),
-                     [(:update, model_), (:skip, stand), (:skip, hot),
-                      (:update, knn), (:skip, oak)],
-                     [(:update, model_), (:skip, hot), (:skip, stand),
-                      (:update, knn), (:skip, oak)],
-                     [(:update, model_), (:skip, stand), (:skip, hot),
-                      (:skip, oak), (:update, knn)],
-                     [(:update, model_), (:skip, hot), (:skip, stand),
-                      (:skip, oak), (:update, knn)])
-
-
-@test MLJBase.tree(mach.fitresult.predict).arg1.arg1.arg1.arg1.model.K == 55
-
-multistand = Standardizer()
-multistandM = machine(multistand, W)
-W2 = transform(multistandM, W)
-
-mach = machine(Unsupervised(), Xs; transform=W2, depwarn=false)
-
-@nodepwarn_from_network mach begin
-    mutable struct MyTransformer
-        one_hot=hot
-    end
-end
-
-model_ = MyTransformer()
-
-mach = machine(model_, X)
-@test_logs((:warn, r""),
-           @test_model_sequence fit_only!(mach) [(:train, model_),
-                                                 (:train, hot), (:train, multistand)]
-           )
-model_.one_hot.drop_last=true
-hot = model_.one_hot
-@test_model_sequence fit_only!(mach) [(:update, model_),
-                                      (:update, hot), (:train, multistand)]
-
-# check nested fitted_params:
-FP = MLJBase.fitted_params(mach)
-@test keys(FP) == (:one_hot, :machines, :fitted_params_given_machine)
-@test Set(FP.one_hot.fitresult.all_features) == Set(keys(X))
-
-transform(mach, X);
-
-
-## TEST MACRO-EXPORTED SUPERVISED NETWORK WITH SAMPLE WEIGHTS
-
-rng = StableRNG(56161)
-N = 500
-X = (x = rand(rng, 3N), );
-y = categorical(rand(rng, "abc", 3N));
-# define class weights :a, :b, :c in ration 2:4:1
-w = map(y) do η
-    if η == 'a'
-        return 2
-    elseif η == 'b'
-        return 4
-    else
-        return 1
-    end
-end;
-Xs = source(X)
-ys = source(y)
-ws = source(w)
-
-standM = machine(Standardizer(), Xs)
-W = transform(standM, Xs)
-
-rgs = ConstantClassifier() # supports weights
-rgsM = machine(rgs, W, ys, ws)
-yhat = predict(rgsM, W)
-
-fit!(yhat, verbosity=0)
-fit!(yhat, rows=1:div(N,2), verbosity=0)
-yhat(rows=1:div(N,2));
-
-mach = machine(Probabilistic(), Xs, ys, ws; predict=yhat, depwarn=false)
-
-@nodepwarn_from_network mach begin
-    mutable struct MyComposite
-        regressor=rgs
-    end
-    supports_weights=true
-end
-
-my_composite = MyComposite()
-@test MLJBase.supports_weights(my_composite)
-mach = @test_logs((:warn, r""), fit!(machine(my_composite, X, y), verbosity=0))
-Xnew = selectrows(X, 1:div(N,2))
-predict(mach, Xnew)[1]
-posterior = predict(mach, Xnew)[1]
-
-# "posterior" is roughly uniform:
-@test abs(pdf(posterior, 'b')/(pdf(posterior, 'a'))  - 1) < 0.15
-@test abs(pdf(posterior, 'b')/(pdf(posterior, 'c'))  - 1) < 0.15
-
-# now add weights:
-mach = @test_logs((:warn, r""),
-                  fit!(machine(my_composite, X, y, w), rows=1:div(N,2), verbosity=0)
-                  )
-posterior = predict(mach, Xnew)[1]
-
-# "posterior" is skewed appropriately in weighted case:
-@test abs(pdf(posterior, 'b')/(2*pdf(posterior, 'a'))  - 1) < 0.15
-@test abs(pdf(posterior, 'b')/(4*pdf(posterior, 'c'))  - 1) < 0.19
-
-# composite with no fields:
-mach = machine(Probabilistic(), Xs, ys, ws; predict=yhat, depwarn=false)
-@nodepwarn_from_network mach begin
-    struct CompositeWithNoFields
-    end
-end
-composite_with_no_fields = CompositeWithNoFields()
-mach = @test_logs((:warn, r""), fit!(machine(composite_with_no_fields, X, y), verbosity=0))
-
-
-## EXPORTING A TRANSFORMER WITH PREDICT AND TRANSFORM
-
-# A dummy clustering model:
-mutable struct DummyClusterer <: Unsupervised
-    n::Int
-end
-DummyClusterer(; n=3) = DummyClusterer(n)
-function MLJBase.fit(model::DummyClusterer, verbosity::Int, X)
-    Xmatrix = Tables.matrix(X)
-    n = min(size(Xmatrix, 2), model.n)
-    centres = Xmatrix[1:n, :]
-    levels = categorical(1:n)
-    report = (centres=centres,)
-    fitresult = levels
-    return fitresult, nothing, report
-end
-MLJBase.transform(model::DummyClusterer, fitresult, Xnew) =
-    selectcols(Xnew, 1:length(fitresult))
-MLJBase.predict(model::DummyClusterer, fitresult, Xnew) =
-    [fill(fitresult[1], nrows(Xnew))...]
-
-N = 20
-X = (a = rand(N), b = categorical(rand("FM", N)))
-
-Xs = source(X)
-W = transform(machine(OneHotEncoder(), Xs), Xs)
-clust = DummyClusterer(n=2)
-m = machine(clust, W)
-yhat = predict(m, W)
-Wout = transform(m, W)
-foo = first(yhat)
-mach = machine(Unsupervised(), Xs;
-               predict=yhat,
-               transform=Wout,
-               report=(foo=foo,),
-               depwarn=false)
-
-@nodepwarn_from_network mach begin
-    mutable struct WrappedClusterer
-        clusterer::Unsupervised = clust
-    end
-    input_scitype = Table(Continuous,Multiclass)
-end
-
-model = WrappedClusterer()
-mach = @test_logs((:warn, r""), fit!(machine(model, X), verbosity=0))
-fit!(yhat, verbosity=0)
-@test predict(mach, X) == yhat()
-@test transform(mach, X).a ≈ Wout().a
-rep = report(mach)
-@test rep.foo == yhat() |> first
-
-
-## EXPORTING A STATIC LEARNING NETWORK (NO TRAINING ARGUMENTS)
-
-age = [23, 45, 34, 25, 67]
-X = (age = age,
-     gender = categorical(['m', 'm', 'f', 'm', 'f']))
-
-struct MyStaticTransformer <: Static
-    ftr::Symbol
-end
-
-MLJBase.transform(transf::MyStaticTransformer, verbosity, X) =
-    selectcols(X, transf.ftr)
-
-Xs = source()
-W = transform(machine(MyStaticTransformer(:age)), Xs)
-Z = 2*W
-
-@nodepwarn_from_network machine(Static(), Xs; transform=Z, depwarn=false) begin
-    struct NoTraining
-    end
-end
-
-mach = @test_logs((:warn, r""), fit!(machine(NoTraining()), verbosity=0))
-@test transform(mach, X) == 2*X.age
-
-
-## TESTINGS A STACK AND IN PARTICULAR FITTED_PARAMS
-
-folds(data, nfolds) =
-    partition(1:nrows(data), (1/nfolds for i in 1:(nfolds-1))...);
-
-model1 = RidgeRegressor()
-model2 = KNNRegressor(K=1)
-judge = KNNRegressor(K=1)
-
-X = source()
-y = source()
-
-folds(X::AbstractNode, nfolds) = node(XX->folds(XX, nfolds), X)
-MLJBase.restrict(X::AbstractNode, f::AbstractNode, i) =
-    node((XX, ff) -> restrict(XX, ff, i), X, f);
-MLJBase.corestrict(X::AbstractNode, f::AbstractNode, i) =
-    node((XX, ff) -> corestrict(XX, ff, i), X, f);
-
-f = folds(X, 3)
-
-m11 = machine(model1, corestrict(X, f, 1), corestrict(y, f, 1))
-m12 = machine(model1, corestrict(X, f, 2), corestrict(y, f, 2))
-m13 = machine(model1, corestrict(X, f, 3), corestrict(y, f, 3))
-
-y11 = predict(m11, restrict(X, f, 1));
-y12 = predict(m12, restrict(X, f, 2));
-y13 = predict(m13, restrict(X, f, 3));
-
-m21 = machine(model2, corestrict(X, f, 1), corestrict(y, f, 1))
-m22 = machine(model2, corestrict(X, f, 2), corestrict(y, f, 2))
-m23 = machine(model2, corestrict(X, f, 3), corestrict(y, f, 3))
-
-y21 = predict(m21, restrict(X, f, 1));
-y22 = predict(m22, restrict(X, f, 2));
-y23 = predict(m23, restrict(X, f, 3));
-
-y1_oos = vcat(y11, y12, y13);
-y2_oos = vcat(y21, y22, y23);
-
-X_oos = MLJBase.table(hcat(y1_oos, y2_oos))
-
-m_judge = machine(judge, X_oos, y)
-
-m1 = machine(model1, X, y)
-m2 = machine(model2, X, y)
-
-y1 = predict(m1, X);
-y2 = predict(m2, X);
-
-X_judge = MLJBase.table(hcat(y1, y2))
-yhat = predict(m_judge, X_judge)
-
-@nodepwarn_from_network machine(Deterministic(), X, y; predict=yhat, depwarn=false) begin
-    mutable struct MyStack
-        regressor1=model1
-        regressor2=model2
-        judge=judge
-    end
-end
-
-my_stack = MyStack()
-X, y = make_regression(18, 2)
-mach = machine(my_stack, X, y)
-@test_logs((:warn, r""), fit!(mach, verbosity=0))
-
-fp = fitted_params(mach)
-@test keys(fp.judge) == (:tree,)
-@test length(fp.regressor1) == 4
-@test length(fp.regressor2) == 4
-@test keys(fp.regressor1[1]) == (:coefficients, :intercept)
-@test keys(fp.regressor2[1]) == (:tree,)
-
-
-## ISSUE #377
-
-stand1 = Standardizer()
-stand2 = Standardizer()
-
-Xraw = (x=[-2.0, 0.0, 2.0],)
-X = source(Xraw)
-
-mach1 = machine(stand1, X)
-X2 = transform(mach1, X)
-
-mach2 = machine(stand2, X2)
-X3 = transform(mach2, X2)
-
-@nodepwarn_from_network machine(Unsupervised(), X; transform=X3, depwarn=false) begin
-    mutable struct CompositeZ
-        s1=stand1
-        s2=stand2
-    end
-end
-
-# check no problems with network:
-fit!(X3)
-@test X3().x ≈ [-1.0, 0.0, 1.0]
-
-# instantiate with identical (===) models in two places:
-model = CompositeZ(s1=stand1, s2=stand1)
-mach = machine(model, Xraw)
-@test_logs((:warn, MLJBase.WARN_NETWORK_MACHINES_DEPRECATION),
-           (:error, MLJBase.logerr_identical_models([:s1, :s2], model)),
-           (:error, r"Problem"),
-           (:info, r"Running"),
-           (:info, r"Type checks okay"),
-           @test_throws(MLJBase.ERR_IDENTICAL_MODELS,
-                        fit!(mach, verbosity=-1)))
-
-
-## SOURCE NODES THAT ARE ALSO OPERATION NODES
-
-stand = Standardizer()
-
-Xs = source()
-mach1 = machine(stand, Xs)
-X2 = transform(mach1, Xs)
-
-network_mach = machine(Unsupervised(), Xs, transform=X2, inverse_transform=Xs, depwarn=false)
-
-@nodepwarn_from_network network_mach begin
-    struct AppleComposite
-        standardizer = stand
-    end
-end
-
-X = (x = Float64[1, 2, 3],)
-mach = machine(AppleComposite(), X)
-@test_logs((:warn, r""), fit!(mach, verbosity=0, force=true))
-@test transform(mach, X).x ≈ Float64[-1, 0, 1]
-@test inverse_transform(mach, X) == X
-
-end
-
-true
diff --git a/test/composition/models/deprecated_methods.jl b/test/composition/models/deprecated_methods.jl
deleted file mode 100644
index 4cd7c907..00000000
--- a/test/composition/models/deprecated_methods.jl
+++ /dev/null
@@ -1,459 +0,0 @@
-module TestCompositesCore
-
-using Test
-using MLJBase
-using Tables
-import MLJBase
-using ..Models
-using ..TestUtilities
-using CategoricalArrays
-using OrderedCollections
-import Random.seed!
-seed!(1234)
-
-const depwarn=false
-
-mutable struct Rubbish <: DeterministicComposite
-    model_in_network
-    model_not_in_network
-    some_other_variable
-end
-
-knn = KNNRegressor()
-model = Rubbish(knn, OneHotEncoder(), 42)
-X, y = make_regression(10, 2)
-
-@testset "logic for composite model update - fallback()" begin
-    Xs = source(X)
-    ys = source(y)
-    mach0 = machine(Standardizer(), Xs)
-    W = transform(mach0, Xs)
-    mach1 = machine(model.model_in_network, W, ys)
-    yhat = predict(mach1, W)
-    mach = machine(Deterministic(), Xs, ys; predict=yhat, depwarn)
-    fitresult, cache, _ = return!(mach, model, 0; depwarn)
-    network_model_names = getfield(fitresult, :network_model_names)
-    @test network_model_names == [:model_in_network, nothing]
-    old_model = cache.old_model
-    glb_node = MLJBase.glb(mach)
-    @test !MLJBase.fallback(model, old_model, network_model_names, glb_node)
-
-    # don't fallback if mutating field for a network model:
-    model.model_in_network.K = 24
-    @test !MLJBase.fallback(model, old_model, network_model_names, glb_node)
-
-    # do fallback if replacing field for a network model:
-    model.model_in_network = KNNRegressor()
-    @test MLJBase.fallback(model, old_model, network_model_names, glb_node)
-
-    # return to original state:
-    model.model_in_network = knn
-    @test !MLJBase.fallback(model, old_model, network_model_names, glb_node)
-
-    # do fallback if a non-network field changes:
-    model.model_not_in_network.features = [:x1,]
-    @test MLJBase.fallback(model, old_model, network_model_names, glb_node)
-
-    # return to original state:
-    model.model_not_in_network = OneHotEncoder()
-    @test !MLJBase.fallback(model, old_model, network_model_names, glb_node)
-
-    # do fallback if any non-model changes:
-    model.some_other_variable = 123412
-    @test MLJBase.fallback(model, old_model, network_model_names, glb_node)
-
-end
-
-model = Rubbish(KNNRegressor(), Standardizer(), 42)
-
-function MLJBase.fit(model::Rubbish, verbosity, X, y)
-    Xs = source(X)
-    ys = source(y)
-    mach1 = machine(model.model_in_network, Xs, ys)
-    yhat = predict(mach1, Xs)
-    mach = machine(Deterministic(), Xs, ys; predict=yhat)
-    return!(mach, model, verbosity; depwarn)
-end
-
-# `model` is instance of `Rubbish`
-mach = fit!(machine(model, X, y), verbosity=0)
-
-@testset "logic for composite model update - fit!" begin
-
-    # immediately refit:
-    @test_model_sequence(fit!(mach), [(:skip, model), ])
-
-    # mutate a field for a network model:
-    model.model_in_network.K = 24
-    @test_model_sequence(fit!(mach),
-                         [(:update, model), (:update, model.model_in_network)])
-
-    # immediately refit:
-    @test_model_sequence(fit!(mach), [(:skip, model), ])
-
-    # replace a field for a network model:
-    model.model_in_network = KNNRegressor()
-    @test_model_sequence(fit!(mach),
-                         [(:update, model), (:train, model.model_in_network)])
-
-    # immediately refit:
-    @test_model_sequence(fit!(mach), [(:skip, model), ])
-
-    # mutate a field for a model not in network:
-    model.model_not_in_network.features = [:x1,]
-    @test_model_sequence(fit!(mach),
-                         [(:update, model), (:train, model.model_in_network)])
-
-    # immediately refit:
-    @test_model_sequence(fit!(mach), [(:skip, model), ])
-
-    # mutate some field that is not a model:
-    model.some_other_variable = 123412
-    @test_model_sequence(fit!(mach),
-                         [(:update, model), (:train, model.model_in_network)])
-end
-
-N = 50
-Xin = (a=rand(N), b=rand(N), c=rand(N));
-yin = rand(N);
-
-train, test = partition(eachindex(yin), 0.7);
-Xtrain = MLJBase.selectrows(Xin, train);
-ytrain = yin[train];
-
-ridge_model = FooBarRegressor(lambda=0.1)
-selector_model = FeatureSelector()
-
-mutable struct WrappedRidge <: DeterministicComposite
-    ridge
-end
-
-# julia bug? If I return the following test to a @testset block, then
-# the test marked with ******* fails (bizarre!)
-#@testset "second test of hand-exported network" begin
-function MLJBase.fit(model::WrappedRidge, verbosity::Integer, X, y)
-    Xs = source(X)
-    ys = source(y)
-
-    stand = Standardizer()
-    standM = machine(stand, Xs)
-    W = transform(standM, Xs)
-
-    boxcox = UnivariateBoxCoxTransformer()
-    boxcoxM = machine(boxcox, ys)
-    z = transform(boxcoxM, ys)
-
-    ridgeM = machine(model.ridge, W, z)
-    zhat = predict(ridgeM, W)
-    yhat = inverse_transform(boxcoxM, zhat)
-
-    mach = machine(Deterministic(), Xs, ys; predict=yhat)
-    return!(mach, model, verbosity; depwarn)
-end
-
-MLJBase.input_scitype(::Type{<:WrappedRidge}) =
-    Table(Continuous)
-MLJBase.target_scitype(::Type{<:WrappedRidge}) =
-    AbstractVector{<:Continuous}
-
-ridge = FooBarRegressor(lambda=0.1)
-model_ = WrappedRidge(ridge)
-mach = machine(model_, Xin, yin)
-id = objectid(mach)
-fit!(mach, verbosity=0)
-@test  objectid(mach) == id  # *********
-yhat=predict(mach, Xin);
-ridge.lambda = 1.0
-fit!(mach, verbosity=0)
-@test predict(mach, Xin) != yhat
-
-#end
-
-# A dummy clustering model:
-mutable struct DummyClusterer <: Unsupervised
-    n::Int
-end
-DummyClusterer(; n=3) = DummyClusterer(n)
-function MLJBase.fit(model::DummyClusterer, verbosity::Int, X)
-    Xmatrix = Tables.matrix(X)
-    n = min(size(Xmatrix, 2), model.n)
-    centres = Xmatrix[1:n, :]
-    levels = categorical(1:n)
-    report = (centres=centres,)
-    fitresult = levels
-    return fitresult, nothing, report
-end
-MLJBase.transform(model::DummyClusterer, fitresult, Xnew) =
-    selectcols(Xnew, 1:length(fitresult))
-MLJBase.predict(model::DummyClusterer, fitresult, Xnew) =
-    [fill(fitresult[1], nrows(Xnew))...]
-
-# A wrap of above model:
-mutable struct WrappedDummyClusterer <: UnsupervisedComposite
-    model
-end
-WrappedDummyClusterer(; model=DummyClusterer()) =
-    WrappedDummyClusterer(model)
-
-@testset "third test of hand-exported network" begin
-    function MLJBase.fit(model::WrappedDummyClusterer, verbosity::Int, X)
-        Xs = source(X)
-        W = transform(machine(OneHotEncoder(), Xs), Xs)
-        m = machine(model.model, W)
-        yhat = predict(m, W)
-        Wout = transform(m, W)
-        foo = node(η -> first(η), yhat)
-        mach = machine(Unsupervised(),
-                       Xs;
-                       predict=yhat,
-                       transform=Wout,
-                       report=(foo=foo,))
-        return!(mach, model, verbosity; depwarn)
-    end
-    X, _ = make_regression(10, 5);
-    model = WrappedDummyClusterer(model=DummyClusterer(n=2))
-    mach = fit!(machine(model, X), verbosity=0)
-    model.model.n = 3
-    fit!(mach, verbosity=0)
-    @test transform(mach, X) == selectcols(X, 1:3)
-    r = report(mach)
-    @test r.model.centres == MLJBase.matrix(X)[1:3,:]
-    @test r.foo == predict(mach, rows=:)[1]
-    fp = fitted_params(mach)
-    @test :model in keys(fp)
-    levs = fp.model.fitresult
-    @test predict(mach, X) == fill(levs[1], 10)
-end
-
-
-## NETWORK WITH MULTIPLE NODES REPORTING STATE/ REFIT
-
-mutable struct TwoStages <: DeterministicComposite
-    model1
-    model2
-    model3
-end
-
-function MLJBase.fit(m::TwoStages, verbosity, X, y)
-    Xs = source(X)
-    ys = source(y)
-    mach1 = machine(m.model1, Xs, ys)
-    mach2 = machine(m.model2, Xs, ys)
-    ypred1 = MLJBase.predict(mach1, Xs)
-    ypred2 = MLJBase.predict(mach2, Xs)
-    Y = MLJBase.table(hcat(ypred1, ypred2))
-    mach3 = machine(m.model3, Y, ys)
-    ypred3 = MLJBase.predict(mach3, Y)
-    μpred = node(x->mean(x), ypred3)
-    σpred = node((x, μ)->mean((x.-μ).^2), ypred3, μpred)
-    mach = machine(Deterministic(),
-                   Xs,
-                   ys;
-                   predict=ypred3,
-                   report=(μpred=μpred,
-                           σpred=σpred))
-    return!(mach, m, verbosity; depwarn)
-end
-
-@testset "Test exported-network with multiple saved nodes and refit" begin
-    X, y = make_regression(100, 3)
-    model3 = FooBarRegressor(lambda=1)
-    twostages = TwoStages(FooBarRegressor(lambda=0.1),
-                          FooBarRegressor(lambda=10), model3)
-    mach = machine(twostages, X, y)
-    fit!(mach, verbosity=0)
-    rep = report(mach)
-    # All machines have been fitted once
-    @test rep.machines[1].state ==
-        rep.machines[2].state ==
-        rep.machines[3].state == 1
-    # Retrieve current values of interest
-    μpred = rep.μpred
-    σpred = rep.σpred
-    # Change model3 and refit
-    model3.lambda = 10
-    fit!(mach, verbosity=0)
-    rep = report(mach)
-    # Machines 1,2 have been fitted once and machine 3 twice
-    @test rep.machines[1].state == rep.machines[2].state == 1
-    @test rep.machines[3].state == 2
-    # The new values have been updated
-    @test rep.μpred != μpred
-    @test rep.σpred != σpred
-end
-
-## COMPOSITE WITH COMPONENT MODELS STORED IN NTUPLE
-
-# `modelnames` is a tuple of `Symbol`s, one for each `model` in `models`:
-mutable struct Averager{modelnames} <: DeterministicComposite
-    models::NTuple{<:Any,Deterministic}
-    weights::Vector{Float64}
-    Averager(modelnames, models, weights) =
-        new{modelnames}(models, weights)
-end
-
-# special kw constructor, allowing one to specify the property names
-# to be attributed to each component model (see below):
-function Averager(; weights=Float64[], named_models...)
-        nt = NamedTuple(named_models)
-    modelnames = keys(nt)
-    models = values(nt)
-    return Averager(modelnames, models, weights)
-end
-
-# for example:
-averager = Averager(weights=[1, 1],
-                    model1=KNNRegressor(K=3),
-                    model2=RidgeRegressor())
-
-# so we can do `averager.model1` and `averager.model2`:
-Base.propertynames(::Averager{modelnames}) where modelnames =
-        tuple(:weights, modelnames...)
-function Base.getproperty(averager::Averager{modelnames},
-                          name::Symbol) where modelnames
-    name === :weights && return getfield(averager, :weights)
-    models = getfield(averager, :models)
-    for j in eachindex(modelnames)
-        name === modelnames[j] && return models[j]
-    end
-    error("type Averager has no field $name")
-end
-
-# overload multiplication of a node by a matrix:
-import Base.*
-*(preds::Node, weights) = node(p->p*weights, preds)
-
-# learning network wrapped in a fit method:
-function MLJBase.fit(averager::Averager{modelnames},
-                     verbosity,
-                     X,
-                     y) where modelnames
-
-    Xs = source(X)
-    ys = source(y)
-
-    weights = averager.weights
-
-    machines = [machine(getproperty(averager, name), Xs, ys) for
-                name in modelnames]
-    predictions = hcat([predict(mach, Xs) for mach in machines]...)
-    yhat = (1/sum(weights))*(predictions*weights)
-
-    mach = machine(Deterministic(), Xs, ys; predict=yhat)
-    return!(mach, averager, verbosity; depwarn)
-end
-
-@testset "composite with component models stored in ntuple" begin
-    X, y = make_regression(10, 3);
-    mach = machine(averager, X, y)
-    fit!(mach, verbosity=0)
-    fp = fitted_params(mach)
-    @test keys(fp.model1) == (:tree, )
-    @test keys(fp.model2) == (:coefficients, :intercept)
-    r = report(mach)
-    @test isnothing(r.model1)
-    @test isnothing(r.model2)
-    range(averager, :(model1.K), lower=2, upper=3)
-end
-
-
-## DATA FRONT-END IN AN EXPORTED LEARNING NETWORK
-
-mutable struct Scale <: MLJBase.Static
-    scaling::Float64
-end
-
-function MLJBase.transform(s::Scale, _, X)
-    X isa AbstractVecOrMat && return X * s.scaling
-    MLJBase.table(s.scaling * MLJBase.matrix(X), prototype=X)
-end
-
-function MLJBase.inverse_transform(s::Scale, _, X)
-    X isa AbstractVecOrMat && return X / s.scaling
-    MLJBase.table(MLJBase.matrix(X) / s.scaling, prototype=X)
-end
-
-mutable struct ElephantModel <: ProbabilisticComposite
-    scaler
-    clf
-    cache::Bool
-end
-
-function MLJBase.fit(model::ElephantModel, verbosity, X, y)
-
-    Xs = source(X)
-    ys = source(y)
-
-    scaler = model.scaler
-    mach1 = machine(scaler, cache=model.cache)
-    W = transform(mach1, Xs)
-
-    # a classifier with reformat front-end:
-    clf = model.clf
-    mach2 = machine(clf, W, ys, cache=model.cache)
-    yhat = predict(mach2, W)
-
-    mach = machine(Probabilistic(), Xs, ys, predict=yhat)
-    return!(mach, model, verbosity; depwarn)
-end
-
-@testset "reformat/selectrows logic in composite model" begin
-
-    X = (x1=ones(5), x2=ones(5))
-    y = categorical(collect("abaaa"))
-    model = ElephantModel(Scale(2.0),
-                        ConstantClassifier(testing=true, bogus=1.0),
-                        true)
-    mach = machine(model, X, y, cache=false)
-
-    @test_logs((:warn, MLJBase.WARN_NETWORK_MACHINES_DEPRECATION),
-               (:info, "reformatting X, y"),
-               (:info, "resampling X, y"),
-               fit!(mach, verbosity=0, rows=1:3)
-               )
-    @test mach.state == 1
-
-    # new clf hyperparmater (same rows) means no reformatting or resampling:
-    model.clf.bogus = 10
-    @test_logs fit!(mach, verbosity=0, rows=1:3)
-    @test mach.state == 2
-
-    # however changing an upstream hyperparameter forces reformatting
-    # and resampling:
-    model.scaler.scaling = 3.1
-    @test_logs((:info, "reformatting X, y"),
-               (:info, "resampling X, y"),
-               fit!(mach, verbosity=0, rows=1:3))
-
-end
-
-@testset "operation nodes that are source nodes" begin
-
-    mutable struct BananaComposite <: UnsupervisedComposite
-        stand
-    end
-    BananaComposite(; stand=Standardizer()) = BananaComposite(stand)
-
-    function MLJBase.fit(model::BananaComposite, verbosity, X)
-
-        Xs = source(X)
-        mach1 = machine(model.stand, Xs)
-        X2 = transform(mach1, Xs)
-
-        # node for the inverse_transform:
-
-        network_mach = machine(Unsupervised(), Xs, transform=X2, inverse_transform=Xs)
-        return!(network_mach, model, verbosity; depwarn)
-
-    end
-
-    X = (x = Float64[1, 2, 3],)
-    mach = machine(BananaComposite(), X)
-    fit!(mach, verbosity=0, force=true)
-    @test transform(mach, X).x ≈ Float64[-1, 0, 1]
-    @test inverse_transform(mach, X) == X
-
-end
-
-end # module
-true
diff --git a/test/composition/models/network_composite.jl b/test/composition/models/network_composite.jl
index 87e064df..26f0d4c6 100644
--- a/test/composition/models/network_composite.jl
+++ b/test/composition/models/network_composite.jl
@@ -1,4 +1,4 @@
-module TestNetowrkComposite
+module TestNetoworkComposite
 
 using Test
 using MLJBase
@@ -9,6 +9,7 @@ using Tables
 using MLJModelInterface
 using CategoricalArrays
 using OrderedCollections
+using StatisticalMeasures
 using Serialization
 
 const MMI = MLJModelInterface
@@ -645,6 +646,39 @@ end
 
 end
 
+# # STATIC MODEL WITH MULTIPLE INPUTS
+
+mutable struct Balancer <: Static end
+MLJBase.transform(::Balancer, _, X, y) = (selectrows(X, 1:2), selectrows(y, 1:2))
+
+struct ThinWrapper <: StaticNetworkComposite
+    balancer
+end
+
+function MLJBase.prefit(wrapper::ThinWrapper, verbosity)
+
+    data = source() # empty source because there is no training data
+    Xs = first(data)
+    ys = last(data)
+
+    mach=machine(:balancer)
+
+    output = transform(mach, Xs, ys)
+
+    (; transform = output)
+
+end
+
+balancer = Balancer()
+wrapper = ThinWrapper(balancer)
+
+X, y = make_blobs()
+mach = machine(wrapper)
+Xunder, yunder = transform(mach, X, y)
+@test Xunder == selectrows(X, 1:2)
+@test yunder == selectrows(y, 1:2)
+
+
 
 # # MACHINE INTEGRATION TESTS
 
@@ -795,7 +829,7 @@ end
     # Test data as been erased at the first and second level of composition
     for submach in machines(glb(smach.fitresult))
         TestUtilities.test_data(submach)
-        if submach isa Machine{<:Composite}
+        if submach isa Machine{<:NetworkComposite}
             for subsubmach in machines(glb(submach.fitresult))
                 TestUtilities.test_data(subsubmach)
             end
diff --git a/test/composition/models/pipelines.jl b/test/composition/models/pipelines.jl
index 8fb793ee..faaf3d4c 100644
--- a/test/composition/models/pipelines.jl
+++ b/test/composition/models/pipelines.jl
@@ -113,7 +113,6 @@ end
     @test_logs @test Pipeline(m, t, u, d, u) isa DeterministicPipeline
 
     # named components:
-    @test_throws MLJBase.ERR_USING_TARGET_KWARG Pipeline(target=u)
     @test Pipeline(c1=m, c2=t, c3=u) isa UnsupervisedPipeline
     @test Pipeline(c1=m, c2=t, c3=u, c5=p) isa ProbabilisticPipeline
     @test Pipeline(c1=m, c2=t) isa StaticPipeline
diff --git a/test/composition/models/stacking.jl b/test/composition/models/stacking.jl
index 6cbe6588..ca973775 100644
--- a/test/composition/models/stacking.jl
+++ b/test/composition/models/stacking.jl
@@ -2,11 +2,11 @@ module TestStacking
 
 using Test
 using MLJBase
+using StatisticalMeasures
 using MLJModelInterface
 using ..Models
 using Random
 using StableRNGs
-
 import Distributions
 
 rng = StableRNGs.StableRNG(1234)
@@ -31,7 +31,7 @@ function test_internal_evaluation(internalreport, std_evaluation, modelnames)
         @test model_ev isa PerformanceEvaluation
         @test model_ev.per_fold == std_ev.per_fold
         @test model_ev.measurement == std_ev.measurement
-        @test model_ev.per_observation[1] === std_ev.per_observation[1] === missing
+        @test model_ev.per_observation[1] == std_ev.per_observation[1]
         @test model_ev.per_observation[2] == std_ev.per_observation[2]
         @test model_ev.operation == std_ev.operation
         @test model_ev.report_per_fold == std_ev.report_per_fold
diff --git a/test/composition/models/static_transformers.jl b/test/composition/models/static_transformers.jl
index c0162950..072dcbca 100644
--- a/test/composition/models/static_transformers.jl
+++ b/test/composition/models/static_transformers.jl
@@ -5,6 +5,7 @@ using Test
 using MLJBase
 using ..Models
 using CategoricalArrays
+using StatisticalMeasures
 import Random.seed!
 seed!(1234)
 
diff --git a/test/composition/models/transformed_target_model.jl b/test/composition/models/transformed_target_model.jl
index 12b1391b..b640f922 100644
--- a/test/composition/models/transformed_target_model.jl
+++ b/test/composition/models/transformed_target_model.jl
@@ -18,10 +18,6 @@ whitener = UnivariateStandardizer()
         TransformedTargetModel(atom),
     )
     @test_logs TransformedTargetModel(atom, transformer=UnivariateStandardizer)
-    model = @test_logs(
-        (:warn, MLJBase.WARN_TARGET_DEPRECATED),
-        TransformedTargetModel(atom, target=whitener),
-    )
     model = @test_logs TransformedTargetModel(atom, transformer=whitener)
     @test model.model == atom
     @test model.inverse == nothing
diff --git a/test/default_measures.jl b/test/default_measures.jl
new file mode 100644
index 00000000..28a28b5d
--- /dev/null
+++ b/test/default_measures.jl
@@ -0,0 +1,42 @@
+mutable struct DRegressor <: Deterministic end
+MLJBase.target_scitype(::Type{<:DRegressor}) =
+    AbstractVector{<:Union{Missing,Continuous}}
+
+mutable struct D2Regressor <: Deterministic end
+MLJBase.target_scitype(::Type{<:D2Regressor}) =
+    AbstractVector{<:Union{Missing,Continuous}}
+
+mutable struct DClassifier <: Deterministic end
+MLJBase.target_scitype(::Type{<:DClassifier}) =
+    AbstractVector{<:Union{Missing,Finite}}
+
+mutable struct DClassifierWeird <: Deterministic end
+MLJBase.target_scitype(::Type{<:DClassifierWeird}) =
+    AbstractVector{<:Textual}
+
+mutable struct PClassifier <: Probabilistic end
+MLJBase.target_scitype(::Type{<:PClassifier}) =
+    AbstractVector{<:Union{Missing,Finite}}
+
+mutable struct PRegressor <: Probabilistic end
+MLJBase.target_scitype(::Type{<:PRegressor}) =
+    AbstractVector{<:Union{Missing,Continuous}}
+
+mutable struct PCountRegressor <: Probabilistic end
+MLJBase.target_scitype(::Type{<:PCountRegressor}) =
+    AbstractVector{<:Union{Missing,Count}}
+
+
+
+@testset "default_measure" begin
+    @test MLJBase.default_measure(DRegressor()) == l2
+    @test MLJBase.default_measure(D2Regressor()) == l2
+    @test MLJBase.default_measure(DClassifier()) == misclassification_rate
+    @test MLJBase.default_measure(PClassifier()) == log_loss
+    @test MLJBase.default_measure(PRegressor()) == log_loss
+    @test MLJBase.default_measure(PCountRegressor()) == log_loss
+    @test isnothing(MLJBase.default_measure(DClassifierWeird()))
+    @test isnothing(MLJBase.default_measure("junk"))
+end
+
+true
diff --git a/test/hyperparam/one_dimensional_ranges.jl b/test/hyperparam/one_dimensional_ranges.jl
index 5f7506ea..1567f91e 100644
--- a/test/hyperparam/one_dimensional_ranges.jl
+++ b/test/hyperparam/one_dimensional_ranges.jl
@@ -116,5 +116,16 @@ end
                range(any1, :any, lower=1, upper=10))
 end
 
+@testset "coverage" begin
+    io = IOBuffer()
+    r1 = range(Int, :junk, lower=1, upper=10)
+    r2 = range(Char, :junk, values=['c', 'd'])
+    show(io, r1)
+    @test String(take!(io)) == "NumericRange(1 ≤ junk ≤ 10; origin=5.5, unit=4.5)"
+    show(io, r2)
+    @test String(take!(io)) == "NominalRange(junk = c, d)"
+    close(io)
+end
+
 end
 true
diff --git a/test/interface/model_api.jl b/test/interface/model_api.jl
index 9bf3e0bf..8966f70f 100644
--- a/test/interface/model_api.jl
+++ b/test/interface/model_api.jl
@@ -2,6 +2,7 @@ module TestModelAPI
 
 using Test
 using MLJBase
+using StatisticalMeasures
 import MLJModelInterface
 using ..Models
 using Distributions
@@ -77,7 +78,7 @@ UnivariateFiniteFitter(;alpha=1.0) = UnivariateFiniteFitter(alpha)
     yhat = predict(mach, nothing) # single UnivariateFinite distribution
 
     @test cross_entropy(fill(yhat, 3), ytest) ≈
-        [-log(1/2), -log(1/2), -log(1/4)]
+        mean([-log(1/2), -log(1/2), -log(1/4)])
 
 end
 
diff --git a/test/machines.jl b/test/machines.jl
index 16655d26..7d0845c2 100644
--- a/test/machines.jl
+++ b/test/machines.jl
@@ -7,6 +7,7 @@ using ..Models
 using StableRNGs
 using Serialization
 using ..TestUtilities
+using StatisticalMeasures
 
 const MLJModelInterface = MLJBase.MLJModelInterface
 const MMI = MLJModelInterface
diff --git a/test/measures/confusion_matrix.jl b/test/measures/confusion_matrix.jl
deleted file mode 100644
index 3e7d9b7f..00000000
--- a/test/measures/confusion_matrix.jl
+++ /dev/null
@@ -1,116 +0,0 @@
-using Test
-using MLJBase
-include(joinpath("..", "..", "test", "_models", "models.jl"))
-using .Models
-
-@testset "_categorical" begin
-    a = [1, 1, 2, 3]
-    b = [3, 3, 4, 5]
-    c = [missing, a...]
-    d = [missing, b...]
-    e = categorical(a)
-    f = categorical(b)
-    g = categorical(c)
-    h = categorical(d)
-    j = CategoricalArrays.CategoricalValue{Int64, UInt32}[e[1], e[1], e[1], e[1]]
-    k = CategoricalArrays.CategoricalValue{Int64, UInt32}[e[4], e[4], e[4], e[4]]
-    rhs = (Set(1:5), Set(1:5))
-    @test Set.(levels.(MLJBase._categorical(a, b))) == rhs
-    @test Set.(levels.(MLJBase._categorical(a, d))) == rhs
-    @test Set.(levels.(MLJBase._categorical(c, b))) == rhs
-    @test Set.(levels.(MLJBase._categorical(c, d))) == rhs
-    @test Set.(levels.(MLJBase._categorical(a, f))) == rhs
-    @test Set.(levels.(MLJBase._categorical(a, h))) == rhs
-    @test Set.(levels.(MLJBase._categorical(b, a))) == rhs
-    @test Set.(levels.(MLJBase._categorical(d, a))) == rhs
-    @test Set.(levels.(MLJBase._categorical(b, c))) == rhs
-    @test Set.(levels.(MLJBase._categorical(d, c))) == rhs
-    @test Set.(levels.(MLJBase._categorical(f, a))) == rhs
-    @test Set.(levels.(MLJBase._categorical(h, a))) == rhs
-
-    @test Set.(levels.(MLJBase._categorical(j, k))) == (Set(1:3), Set(1:3))
-
-    # case of ordinary vector with CategoricalValue eltype:
-    acv = CategoricalArrays.CategoricalVector
-end
-
-@testset "basics" begin
-    yraw = ['m',     'm', 'f', 'n', missing, 'f', 'm', 'n', 'n', 'm', 'f']
-    ŷraw = [missing, 'f', 'f', 'm', 'f',     'f', 'n', 'm', 'n', 'm', 'f']
-    y = categorical(yraw)
-    ŷ = categorical(ŷraw)
-    l = levels(y) # f, m, n
-    cm = MLJBase._confmat(ŷ, y; warn=false)
-    ŷ_clean, y_clean = MLJBase.skipinvalid(ŷ, y)
-    ee(l,i,j) = sum((ŷ_clean .== l[i]) .& (y_clean .== l[j]))
-    for i in 1:3, j in 1:3
-        @test cm[i,j] == ee(l,i,j)
-    end
-
-    cm2 = @test_logs (:warn, r"The classes are") MLJBase._confmat(ŷraw, yraw)
-    @test cm2.mat == cm.mat
-
-    perm = [3, 1, 2]
-    l2 = l[perm]
-    cm2 = @test_logs MLJBase._confmat(ŷ, y; perm=perm)
-    m = ConfusionMatrix(perm=perm)
-    for i in 1:3, j in 1:3
-        @test cm2[i,j] == ee(l2,i,j)
-    end
-    @test_logs (:warn, r"The classes are un") MLJBase._confmat(ŷ, y)
-    ŷc = coerce(ŷ, Union{Missing,OrderedFactor})
-    yc = coerce(y, Union{Missing,OrderedFactor})
-    @test MLJBase._confmat(ŷc, yc).mat == cm.mat
-
-    y = categorical(['a','b','a','b'])
-    ŷ = categorical(['b','b','a','a'])
-    @test_logs (:warn, r"The classes are un") MLJBase._confmat(ŷ, y)
-
-    # more tests for coverage
-    y = categorical([1,2,3,1,2,3,1,2,3])
-    ŷ = categorical([1,2,3,1,2,3,1,2,3])
-    @test_throws ArgumentError MLJBase._confmat(ŷ, y, rev=true)
-
-    # silly test for display
-    ŷ = coerce(y, OrderedFactor)
-    y = coerce(y, OrderedFactor)
-    iob = IOBuffer()
-    Base.show(iob, MIME("text/plain"), MLJBase._confmat(ŷ, y))
-    siob = String(take!(iob))
-    @test strip(siob) == strip("""
-              ┌──────────────┐
-              │ Ground Truth │
-    ┌─────────┼────┬────┬────┤
-    │Predicted│ 1  │ 2  │ 3  │
-    ├─────────┼────┼────┼────┤
-    │    1    │ 3  │ 0  │ 0  │
-    ├─────────┼────┼────┼────┤
-    │    2    │ 0  │ 3  │ 0  │
-    ├─────────┼────┼────┼────┤
-    │    3    │ 0  │ 0  │ 3  │
-    └─────────┴────┴────┴────┘""")
-end
-
-@testset "ConfusionMatrix measure" begin
-
-    @test info(confmat).orientation == :other
-    model = DeterministicConstantClassifier()
-
-    X = (x=rand(10),)
-    long = categorical(collect("abbaacaabbbbababcbac"), ordered=true)
-    y = long[1:10]
-    yhat =long[11:20]
-
-    @test confmat(yhat, y).mat == [1 2 0; 3 1 1; 1 1 0]
-    @test ConfusionMatrix(perm=[2, 1, 3])(yhat, y).mat ==
-        MLJBase._confmat(yhat, y, perm=[2, 1, 3]).mat
-
-    MLJBase.value(confmat, yhat, X, y, nothing)
-
-    e = evaluate(model, X, y,
-                 measures=[misclassification_rate, confmat],
-                 resampling=Holdout(fraction_train=0.5))
-    cm = e.measurement[2]
-    @test cm.labels == ["a", "b", "c"]
-    @test cm.mat == [2 2 1; 0 0 0; 0 0 0]
-end
diff --git a/test/measures/continuous.jl b/test/measures/continuous.jl
deleted file mode 100644
index 3e645845..00000000
--- a/test/measures/continuous.jl
+++ /dev/null
@@ -1,31 +0,0 @@
-rng = StableRNG(666899)
-
-@testset "regressor measures" begin
-    y    = [1, 42,  2, 3, missing, 4]
-    yhat = [4, NaN, 3, 2, 42,      1]
-    w =    [1, 42,  2, 4, 42,      3]
-    y    = [1,  2, 3, 4]
-    yhat = [4, 3, 2,      1]
-    w =    [1,  2, 4,      3]
-    @test isapprox(mae(yhat, y), 2)
-    @test isapprox(mae(yhat, y, w), (1*3 + 2*1 + 4*1 + 3*3)/4)
-    @test isapprox(rms(yhat, y), sqrt(5))
-    @test isapprox(rms(yhat, y, w), sqrt((1*3^2 + 2*1^2 + 4*1^2 + 3*3^2)/4))
-    @test rsq(yhat, y) == -3
-    @test isapprox(mean(skipinvalid(l1(yhat, y))), 2)
-    @test isapprox(mean(skipinvalid(l1(yhat, y, w))), mae(yhat, y, w))
-    @test isapprox(mean(skipinvalid(l2(yhat, y))), 5)
-    @test isapprox(mean(skipinvalid(l2(yhat, y, w))), rms(yhat, y, w)^2)
-    @test isapprox(mean(skipinvalid(log_cosh(yhat, y))), 1.3715546675)
-
-    y    = [1, 42,  2, 3, missing, 4]
-    yhat = [2, NaN, 3, 4, 42,      5]
-    @test isapprox(rmsl(yhat, y),
-                   sqrt((log(1/2)^2 + log(2/3)^2 + log(3/4)^2 + log(4/5)^2)/4))
-    @test isapprox(rmslp1(yhat, y),
-                   sqrt((log(2/3)^2 + log(3/4)^2 + log(4/5)^2 + log(5/6)^2)/4))
-    @test isapprox(rmsp(yhat, y), sqrt((1 + 1/4 + 1/9 + 1/16)/4))
-    @test isapprox(mape(yhat, y), (1/1 + 1/2 + 1/3 + 1/4)/4)
-end
-
-true
diff --git a/test/measures/doc_strings.jl b/test/measures/doc_strings.jl
deleted file mode 100644
index 1cbf96c4..00000000
--- a/test/measures/doc_strings.jl
+++ /dev/null
@@ -1,9 +0,0 @@
-using MLJBase
-
-docstring = (Base.Docs.doc)((Base.Docs.Binding)(Main, :multiclass_recall))
-
-@test string(docstring) == "An instance of type "*
-    "[`MulticlassTruePositiveRate`](@ref). Query the "*
-    "[`MulticlassTruePositiveRate`](@ref) doc-string for details. \n"
-
-true
diff --git a/test/measures/finite.jl b/test/measures/finite.jl
deleted file mode 100644
index f06266c3..00000000
--- a/test/measures/finite.jl
+++ /dev/null
@@ -1,609 +0,0 @@
-rng = StableRNG(51803)
-
-const Vec = AbstractVector
-
-@testset "misclassification_rate" begin
-    y    = categorical(collect("asdfasdfaaassdd"))
-    yhat = categorical(collect("asdfaadfaasssdf"))
-    w = 1:15
-    ym = vcat(y, [missing,])
-    yhatm = vcat(yhat, [missing,])
-    wm = 1:16
-    @test misclassification_rate(yhat, y) ≈ 0.2
-    @test misclassification_rate(yhatm, ym) ≈ 0.2
-    @test misclassification_rate(yhat, y, w) ≈ (6*1 + 11*1 + 15*1) / 15
-    @test misclassification_rate(yhatm, ym, wm) ≈ (6*1 + 11*1 + 15*1) / 15
-end
-
-@testset "mcr, acc, bacc, mcc" begin
-    y = categorical(['m', 'f', 'n', 'f', 'm', 'n', 'n', 'm', 'f'])
-    ŷ = categorical(['f', 'f', 'm', 'f', 'n', 'm', 'n', 'm', 'f'])
-    @test accuracy(ŷ, y) == 1-mcr(ŷ,y) ==
-        accuracy(MLJBase._confmat(ŷ, y, warn=false))  ==
-        1-mcr(MLJBase._confmat(ŷ, y, warn=false))
-    w = randn(rng,length(y))
-    @test accuracy(ŷ, y, w) == 1-mcr(ŷ,y,w)
-
-    ## balanced accuracy
-    y = categorical([
-        3, 4, 1, 1, 1, 4, 1, 3, 3, 1, 2, 3, 1, 3, 3, 3, 2, 4, 3, 2, 1, 3,
-        3, 1, 1, 1, 2, 4, 1, 4, 4, 4, 1, 1, 4, 4, 3, 1, 2, 2, 3, 4, 2, 1,
-        2, 2, 3, 2, 2, 3, 1, 2, 3, 4, 1, 2, 4, 2, 1, 4, 3, 2, 3, 3, 3, 1,
-        3, 1, 4, 3, 1, 2, 3, 1, 2, 2, 4, 4, 1, 3, 2, 1, 4, 3, 3, 1, 3, 1,
-        2, 2, 2, 2, 2, 3, 2, 1, 1, 4, 2, 2])
-    ŷ = categorical([
-        2, 3, 2, 1, 2, 2, 3, 3, 2, 4, 2, 3, 2, 4, 3, 4, 4, 2, 1, 3, 3, 3,
-        3, 3, 2, 4, 4, 3, 4, 4, 1, 2, 3, 2, 4, 1, 2, 3, 1, 4, 2, 2, 1, 2,
-        3, 2, 2, 4, 3, 2, 2, 2, 1, 2, 2, 1, 3, 1, 4, 1, 2, 1, 2, 4, 3, 2,
-        4, 3, 2, 4, 4, 2, 4, 3, 2, 3, 1, 2, 1, 2, 1, 2, 3, 1, 1, 3, 4, 2,
-        4, 4, 2, 1, 3, 2, 2, 4, 1, 1, 4, 1])
-    w = [
-        0.5, 1.4, 0.6, 1. , 0.1, 0.5, 1.2, 0.2, 1.8, 0.3, 0.6, 2.2, 0.1,
-        1.4, 0.2, 0.4, 0.6, 2.1, 0.7, 0.2, 0.9, 0.4, 0.7, 0.3, 0.1, 1.7,
-        0.2, 0.7, 1.2, 1. , 0.9, 0.4, 0.5, 0.5, 0.5, 1. , 0.3, 0.1, 0.2,
-        0. , 2.2, 0.8, 0.9, 0.8, 1.3, 0.2, 0.4, 0.7, 1. , 0.7, 1.7, 0.7,
-        1.1, 1.8, 0.1, 1.2, 1.8, 1. , 0.1, 0.5, 0.6, 0.7, 0.6, 1.2, 0.6,
-        1.2, 0.5, 0.5, 0.8, 0.2, 0.6, 1. , 0.3, 1. , 0.2, 1.1, 1.1, 1.1,
-        0.6, 1.4, 1.2, 0.3, 1.1, 0.2, 0.5, 1.6, 0.3, 1. , 0.3, 0.9, 0.9,
-        0. , 0.6, 0.6, 0.4, 0.5, 0.4, 0.2, 0.9, 0.4]
-    sk_bacc = 0.17493386243386244 # note: sk-learn reverses ŷ and y
-    @test bacc(ŷ, y) ≈ sk_bacc
-    sk_adjusted_bacc =  -0.10008818342151675
-    @test BalancedAccuracy(adjusted=true)(ŷ, y) ≈ sk_adjusted_bacc
-    sk_bacc_w = 0.1581913163016446
-    @test bacc(ŷ, y, w) ≈ sk_bacc_w
-    sk_adjusted_bacc_w = -0.1224115782644738
-    @test BalancedAccuracy(adjusted=true)(ŷ, y, w) ≈ sk_adjusted_bacc_w
-
-    ## matthews correlation
-    sk_mcc = -0.09759509982785947
-    @test mcc(ŷ, y) == matthews_correlation(ŷ, y) ≈ sk_mcc
-    # invariance with respect to permutation ?
-    cm = MLJBase._confmat(ŷ, y, perm=[3, 1, 2, 4])
-    @test mcc(cm) ≈ sk_mcc
-
-    # Issue #381
-    cm = MLJBase.ConfusionMatrixObject([29488 13017; 12790 29753], ["0.0", "1.0"])
-    @test mcc(cm) ≈ 0.39312321239417797
-end
-
-@testset "kappa" begin
-    # Binary case
-    y_b = categorical([2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2])
-    ŷ_b = categorical([1, 1, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2])
-    cm_b = MLJBase._confmat(y_b, ŷ_b, warn=false)
-    p0_b = (4+10)/30
-    pe_b = (13*11 + 17*19)/(30*30)
-
-    # Multiclass case
-    y_m = categorical([5, 5, 3, 5, 4, 4, 2, 2, 3, 2, 5, 2, 4, 3, 2, 1, 1, 5, 1, 4, 2, 5, 4, 5, 2, 3, 3, 4, 2, 4])
-    ŷ_m = categorical([1, 1, 1, 5, 4, 2, 1, 3, 4, 4, 2, 5, 4, 4, 1, 5, 5, 2, 3, 3, 1, 3, 2, 5, 5, 2, 3, 2, 5, 3])
-    cm_m = MLJBase._confmat(ŷ_m, y_m, warn=false)
-    p0_m = 5/30
-    pe_m = (3*6 + 8*6 + 5*6 + 7*5 + 7*7)/(30*30)
-
-    # Tests
-    @test kappa(y_m, ŷ_m) ≈ (p0_m - pe_m)/(1 - pe_m)
-    @test kappa(y_b, ŷ_b) ≈ (p0_b - pe_b)/(1 - pe_b)
-    @test kappa(cm_m)     == kappa(y_m, ŷ_m)
-    @test kappa(cm_b)     == kappa(y_b, ŷ_b)
-    @test kappa(ŷ_m, y_m) == kappa(y_m, ŷ_m)
-    @test kappa(ŷ_b, y_b) == kappa(y_b, ŷ_b)
-    @test kappa(y_m, y_m) == 1.0
-    @test kappa(y_b, y_b) == 1.0
-end
-
-@testset "confusion matrix {2}" begin
-    # first class is 1 is assumed negative, second positive
-    y = categorical([1, 2, 1, 2, 1, 1, 2])
-    ŷ = categorical([1, 2, 2, 2, 2, 1, 2])
-    cm = MLJBase._confmat(ŷ, y, warn=false)
-    TN = sum(ŷ .== y .== 1) # pred and true = - (1)
-    TP = sum(ŷ .== y .== 2) # pred and true = + (2)
-    FP = sum(ŷ .!= y .== 1) # pred + (2) and true - (1)
-    FN = sum(ŷ .!= y .== 2) # pred - (1) and true + (2)
-    @test cm[1,1] == TN
-    @test cm[2,2] == TP
-    @test cm[1,2] == FN
-    @test cm[2,1] == FP
-
-    ym = categorical([1, missing, 2, 1, 2, 1, 1, 1, 2])
-    ŷm = categorical([1, 2,       2, 2, 2, missing, 2, 1, 2])
-    cm = MLJBase._confmat(ŷ, y, warn=false)
-    TN = sum(skipmissing(ŷ .== y .== 1)) # pred and true = - (1)
-    TP = sum(skipmissing(ŷ .== y .== 2)) # pred and true = + (2)
-    FP = sum(skipmissing(ŷ .!= y .== 1)) # pred + (2) and true - (1)
-    FN = sum(skipmissing(ŷ .!= y .== 2)) # pred - (1) and true + (2)
-    @test cm[1,1] == TN
-    @test cm[2,2] == TP
-    @test cm[1,2] == FN
-    @test cm[2,1] == FP
-
-    cm2 = MLJBase._confmat(ŷ, y; rev=true)
-    @test cm2[1,1] == cm[2,2]
-    @test cm2[1,2] == cm[2,1]
-    @test cm2[2,2] == cm[1,1]
-    @test cm2[2,1] == cm[1,2]
-
-    @test accuracy(ŷ, y) == accuracy(cm) == sum(y .== ŷ) / length(y)
-
-    @test @test_logs((:warn, r"The classes are un-ordered"),
-                     recall(ŷ, y) == TP / (TP + FN))
-
-    ŷ = coerce(ŷ, Union{Missing,OrderedFactor})
-    y = coerce(y, Union{Missing,OrderedFactor})
-
-    @test precision(ŷ, y)   == TP / (TP + FP)
-    @test specificity(ŷ, y) == TN / (TN + FP)
-    @test f1score(ŷ, y) ≈
-        2.0 / (1.0 / recall(ŷ, y) + 1.0 / precision(ŷ, y))
-
-    recall_rev = Recall(rev=true)
-    @test recall_rev(ŷ, y) ==
-        TN / (TN + FP) # no warning because rev is specified
-    precision_rev = Precision(rev=true)
-    @test precision_rev(ŷ, y) == TN / (TN + FN)
-    specificity_rev = Specificity(rev=true)
-    @test specificity_rev(ŷ, y) == TP / (TP + FN)
-    f1score_rev = FScore(rev=true)
-    @test f1score_rev(ŷ, y) ≈
-        2.0 / (1.0 / recall_rev(ŷ, y) + 1.0 / precision_rev(ŷ, y))
-end
-
-@testset "confusion matrix {n}" begin
-    y = coerce([1, 2, 0, 2, 1, 0, 0, 1, 2, 2, 2, 1, 2,
-                            2, 1, 0, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1,
-                            2, 2, 2], Multiclass)
-    ŷ = coerce([2, 0, 2, 2, 2, 0, 1, 2, 1, 2, 0, 1, 2,
-                            1, 1, 1, 2, 0, 1, 2, 1, 2, 2, 2, 1, 2,
-                            1, 2, 2], Multiclass)
-    class_w = Dict(0=>0,2=>2,1=>1)
-    cm = MLJBase._confmat(ŷ, y, warn=false)
-
-    #               ┌─────────────────────────────────────────┐
-    #               │              Ground Truth               │
-    # ┌─────────────┼─────────────┬─────────────┬─────────────┤
-    # │  Predicted  │      0      │      1      │      2      │
-    # ├─────────────┼─────────────┼─────────────┼─────────────┤
-    # │      0      │      1      │      1      │      2      │
-    # ├─────────────┼─────────────┼─────────────┼─────────────┤
-    # │      1      │      2      │      4      │      4      │
-    # ├─────────────┼─────────────┼─────────────┼─────────────┤
-    # │      2      │      1      │      6      │      8      │
-    # └─────────────┴─────────────┴─────────────┴─────────────┘
-
-    cm_tp   = [1; 4; 8]
-    cm_tn   = [22; 12; 8]
-    cm_fp   = [1+2; 2+4; 1+6]
-    cm_fn   = [2+1; 1+6; 2+4]
-    cm_prec = cm_tp ./ ( cm_tp + cm_fp  )
-    cm_rec  = cm_tp ./ ( cm_tp + cm_fn  )
-
-    # Check if is positive
-    m = MulticlassTruePositive(;return_type=Vector)
-    @test  [0; 0; 0] <= m(ŷ, y) == cm_tp
-    m = MulticlassTrueNegative(;return_type=Vector)
-    @test  [0; 0; 0] <= m(ŷ, y) == cm_tn
-    m = MulticlassFalsePositive(;return_type=Vector)
-    @test  [0; 0; 0] <= m(ŷ, y) == cm_fp
-    m = MulticlassFalseNegative(;return_type=Vector)
-    @test  [0; 0; 0] <= m(ŷ, y) == cm_fn
-
-    # Check if is in [0,1]
-    m = MulticlassTruePositiveRate(average=no_avg;return_type=Vector)
-    @test  [0; 0; 0] <= m(ŷ, y) == cm_tp ./ (cm_fn.+cm_tp) <= [1; 1; 1]
-    m = MulticlassTrueNegativeRate(average=no_avg;return_type=Vector)
-    @test  [0; 0; 0] <= m(ŷ, y) == cm_tn ./ (cm_tn.+cm_fp) <= [1; 1; 1]
-    m = MulticlassFalsePositiveRate(average=no_avg;return_type=Vector)
-    @test  [0; 0; 0] <= m(ŷ, y) == 1 .- cm_tn ./ (cm_tn.+cm_fp) <= [1; 1; 1]
-    m = MulticlassFalseNegativeRate(average=no_avg;return_type=Vector)
-    @test  [0; 0; 0] <= m(ŷ, y) == 1 .- cm_tp ./ (cm_fn.+cm_tp) <= [1; 1; 1]
-
-    #`no_avg` and `LittleDict`
-    @test collect(values(MulticlassPrecision(average=no_avg)(cm))) ≈
-        collect(values(MulticlassPrecision(average=no_avg)(ŷ, y))) ≈
-        cm_prec
-    @test MulticlassPrecision(average=macro_avg)(cm) ≈
-        MulticlassPrecision(average=macro_avg)(ŷ, y) ≈ mean(cm_prec)
-    @test collect(keys(MulticlassPrecision(average=no_avg)(cm)))  ==
-        collect(keys(MulticlassPrecision(average=no_avg)(ŷ, y))) ==
-        ["0"; "1"; "2"]
-    @test collect(values(MulticlassRecall(average=no_avg)(cm))) ≈
-        collect(values(MulticlassRecall(average=no_avg)(ŷ, y))) ≈
-        cm_rec
-    @test collect(values(MulticlassFScore(average=no_avg)(cm))) ≈
-        collect(values(MulticlassFScore(average=no_avg)(ŷ, y))) ≈
-        2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec )
-
-    #`no_avg` and `LittleDict` with class weights
-    @test collect(values(MulticlassPrecision(average=no_avg)(cm, class_w))) ≈
-        collect(values(MulticlassPrecision(average=no_avg)(ŷ, y, class_w))) ≈
-        cm_prec .* [0; 1; 2]
-    @test collect(values(MulticlassRecall(average=no_avg)(cm, class_w))) ≈
-        collect(values(MulticlassRecall(average=no_avg)(ŷ, y, class_w))) ≈
-        cm_rec .* [0; 1; 2]
-    @test collect(values(MulticlassFScore(average=no_avg)(cm, class_w))) ≈
-        collect(values(MulticlassFScore(average=no_avg)(ŷ, y, class_w))) ≈
-        2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec ) .* [0; 1; 2]
-
-    #`macro_avg` and `LittleDict`
-    macro_prec = MulticlassPrecision(average=macro_avg)
-    macro_rec  = MulticlassRecall(average=macro_avg)
-
-    @test macro_prec(cm)    ≈ macro_prec(ŷ, y)    ≈ mean(cm_prec)
-    @test macro_rec(cm)     ≈ macro_rec(ŷ, y)     ≈ mean(cm_rec)
-    @test macro_f1score(cm) ≈ macro_f1score(ŷ, y) ≈ mean(2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec ))
-
-    #`micro_avg` and `LittleDict`
-    micro_prec = MulticlassPrecision(average=micro_avg)
-    micro_rec  = MulticlassRecall(average=micro_avg)
-
-    @test micro_prec(cm)    == micro_prec(ŷ, y)    == sum(cm_tp) ./ sum(cm_fp.+cm_tp)
-    @test micro_rec(cm)     == micro_rec(ŷ, y)     == sum(cm_tp) ./ sum(cm_fn.+cm_tp)
-    @test micro_f1score(cm) == micro_f1score(ŷ, y) ==
-    2 ./ ( 1 ./ ( sum(cm_tp) ./ sum(cm_fp.+cm_tp) ) + 1 ./ ( sum(cm_tp) ./ sum(cm_fn.+cm_tp) ) )
-
-    #`no_avg` and `Vector` with class weights
-    vec_precision = MulticlassPrecision(return_type=Vector)
-    vec_recall    = MulticlassRecall(return_type=Vector)
-    vec_f1score   = MulticlassFScore(return_type=Vector)
-
-    @test vec_precision(cm, class_w) ≈ vec_precision(ŷ, y, class_w) ≈
-        mean(cm_prec .* [0; 1; 2])
-    @test vec_recall(cm, class_w)    ≈ vec_recall(ŷ, y, class_w)    ≈
-        mean(cm_rec .* [0; 1; 2])
-    @test vec_f1score(cm, class_w)   ≈ vec_f1score(ŷ, y, class_w)   ≈
-        mean(2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec ) .* [0; 1; 2])
-
-    #`macro_avg` and `Vector`
-    v_ma_prec = MulticlassPrecision(average=macro_avg,
-                                    return_type=Vector)
-    v_ma_rec  = MulticlassRecall(average=macro_avg, return_type=Vector)
-    v_ma_f1   = MulticlassFScore(average=macro_avg, return_type=Vector)
-
-    @test v_ma_prec(cm) ≈ v_ma_prec(ŷ, y) ≈ mean(cm_prec)
-    @test v_ma_rec(cm)  ≈ v_ma_rec(ŷ, y)  ≈ mean(cm_rec)
-    @test v_ma_f1(cm)   ≈ v_ma_f1(ŷ, y)   ≈ mean(2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec ))
-
-    #`macro_avg` and `Vector` with class weights
-    @test v_ma_prec(cm, class_w) ≈ v_ma_prec(ŷ, y, class_w) ≈
-        mean(cm_prec .* [0, 1, 2])
-    @test v_ma_rec(cm, class_w)  ≈ v_ma_rec(ŷ, y, class_w)  ≈
-        mean(cm_rec .* [0, 1, 2])
-    @test v_ma_f1(cm, class_w)   ≈ v_ma_f1(ŷ, y, class_w)   ≈
-        mean(2 ./ ( 1 ./ cm_prec + 1 ./ cm_rec ) .* [0, 1, 2])
-
-    #`micro_avg` and `Vector`
-    v_mi_prec = MulticlassPrecision(average=micro_avg, return_type=Vector)
-    v_mi_rec  = MulticlassRecall(average=micro_avg, return_type=Vector)
-    v_mi_f1   = MulticlassFScore(average=micro_avg, return_type=Vector)
-
-    @test v_mi_prec(cm) == v_mi_prec(ŷ, y) == sum(cm_tp) ./ sum(cm_fp.+cm_tp)
-    @test v_mi_rec(cm)  == v_mi_rec(ŷ, y)  == sum(cm_tp) ./ sum(cm_fn.+cm_tp)
-    @test v_mi_f1(cm)   == v_mi_f1(ŷ, y)   ==
-    2 ./ ( 1 ./ ( sum(cm_tp) ./ sum(cm_fp.+cm_tp) ) + 1 ./ ( sum(cm_tp) ./ sum(cm_fn.+cm_tp) ) )
-end
-
-@testset "issue #630" begin
-    # multiclass fscore corner case of absent class
-
-    y = coerce([1, 2, 2, 2, 3], OrderedFactor)[1:4]
-    # [1, 2, 2, 2] # but 3 is in the pool
-    yhat = reverse(y)
-    # [2, 2, 2, 1]
-
-    # In this case, assigning "3" as "positive" gives all true negative,
-    # and so NaN for that class's contribution to the average F1Score,
-    # which should accordingly be skipped.
-
-    # postive class | TP | FP | FN | score for that class
-    # --------------|----|----|----|---------------------
-    #  1            | 0  | 1  | 2  | 0
-    #  2            | 2  | 1  | 1  | 2/3
-    #  3            | 0  | 0  | 0  | NaN
-
-    # mean score with skippin NaN is 1/3
-    @test MulticlassFScore()(yhat, y) ≈ 1/3
-end
-
-@testset "Metadata binary" begin
-    for m in (accuracy, recall, Precision(), f1score, specificity)
-        e = info(m)
-        m == accuracy    && (@test e.name == "Accuracy")
-        m == recall      && (@test e.name == "TruePositiveRate")
-        m isa Precision  && (@test e.name == "Precision")
-        m == f1score     && (@test e.name == "FScore")
-        m == specificity && (@test e.name == "TrueNegativeRate")
-        @test e.target_scitype <: AbstractArray{<:Union{Missing,Finite}}
-        @test e.prediction_type == :deterministic
-        @test e.orientation == :score
-        @test e.reports_each_observation == false
-        @test e.is_feature_dependent == false
-        if m == accuracy
-            @test e.supports_weights
-        else
-            @test !e.supports_weights
-        end
-    end
-    e = info(auc)
-    @test e.name == "AreaUnderCurve"
-    @test e.target_scitype ==
-        Union{AbstractArray{<:Union{Missing,Multiclass{2}}},
-              AbstractArray{<:Union{Missing,OrderedFactor{2}}}}
-    @test e.prediction_type == :probabilistic
-    @test e.reports_each_observation == false
-    @test e.is_feature_dependent == false
-    @test e.supports_weights == false
-end
-
-@testset "Metadata multiclass" begin
-    for m in (MulticlassRecall(), MulticlassPrecision(),
-              MulticlassFScore(), MulticlassTrueNegativeRate())
-        e = info(m)
-        m isa MulticlassRecall &&
-            (@test e.name == "MulticlassTruePositiveRate")
-        m isa MulticlassPrecision   &&
-            (@test e.name == "MulticlassPrecision")
-        m isa MulticlassFScore &&
-            (@test e.name == "MulticlassFScore")
-        m isa MulticlassTrueNegativeRate &&
-            (@test e.name == "MulticlassTrueNegativeRate")
-        @test e.target_scitype <: AbstractArray{<:Union{Missing,Finite}}
-        @test e.prediction_type == :deterministic
-        @test e.orientation == :score
-        @test e.reports_each_observation == false
-        @test e.is_feature_dependent == false
-        @test e.supports_weights == false
-        @test e.supports_class_weights == true
-    end
-end
-
-@testset "More binary metrics" begin
-    y = coerce([missing, 1, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 2,
-                2, 1, 2, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1,
-                2, 2, 2, 1], Union{Missing,OrderedFactor})
-    ŷ = coerce([1, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2,
-                1, 1, 1, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2,
-                1, 2, 2, missing], Union{Missing,OrderedFactor})
-
-    # check all constructors
-    m = TruePositive()
-    @test m(ŷ, y) == truepositive(ŷ, y)
-    m = TruePositive(rev=true)
-    @test m(ŷ, y) == truenegative(ŷ, y)
-    m = TrueNegative()
-    @test m(ŷ, y) == truenegative(ŷ, y)
-    m = FalsePositive()
-    @test m(ŷ, y) == falsepositive(ŷ, y)
-    m = FalseNegative()
-    @test m(ŷ, y) == falsenegative(ŷ, y)
-    m = TruePositiveRate()
-    @test m(ŷ, y) == tpr(ŷ, y) == truepositive_rate(ŷ, y)
-    m = TrueNegativeRate()
-    @test m(ŷ, y) == tnr(ŷ, y) == truenegative_rate(ŷ, y)
-    m = FalsePositiveRate()
-    @test m(ŷ, y) == fpr(ŷ, y) == falsepositive_rate(ŷ, y)
-    m = FalseNegativeRate()
-    @test m(ŷ, y) == fnr(ŷ, y) == falsenegative_rate(ŷ, y)
-    m = FalseDiscoveryRate()
-    @test m(ŷ, y) == fdr(ŷ, y) == falsediscovery_rate(ŷ, y)
-    m = Precision()
-    @test m(ŷ, y) == precision(ŷ, y)
-    m = NPV()
-    @test m(ŷ, y) == npv(ŷ, y)
-    m = FScore()
-    @test m(ŷ, y) == f1score(ŷ, y)
-    # check synonyms
-    m = TPR()
-    @test m(ŷ, y) == tpr(ŷ, y)
-    m = TNR()
-    @test m(ŷ, y) == tnr(ŷ, y)
-    m = FPR()
-    @test m(ŷ, y) == fpr(ŷ, y) == fallout(ŷ, y)
-    m = FNR()
-    @test m(ŷ, y) == fnr(ŷ, y) == miss_rate(ŷ, y)
-    m = FDR()
-    @test m(ŷ, y) == fdr(ŷ, y)
-    m = PPV()
-    @test m(ŷ, y) == precision(ŷ, y) == ppv(ŷ, y)
-    m = Recall()
-    @test m(ŷ, y) == tpr(ŷ, y) == recall(ŷ, y) ==
-        sensitivity(ŷ, y) == hit_rate(ŷ, y)
-    m = Specificity()
-    @test m(ŷ, y) == tnr(ŷ, y) == specificity(ŷ, y) == selectivity(ŷ, y)
-    # 'higher order'
-    m = BACC()
-    @test m(ŷ, y) == bacc(ŷ, y) == (tpr(ŷ, y) + tnr(ŷ, y))/2
-
-    ### External comparisons
-    sk_prec = 0.6111111111111112 # m.precision_score(y, yhat, pos_label=2)
-    @test precision(ŷ, y) ≈ sk_prec
-    sk_rec = 0.6875
-    @test recall(ŷ, y) == sk_rec # m.recall_score(y, yhat, pos_label=2)
-    sk_f05 = 0.625
-    f05 = FScore(β=0.5)
-    @test f05(ŷ, y) ≈ sk_f05 # m.fbeta_score(y, yhat, 0.5, pos_label=2)
-
-    # reversion mechanism
-    sk_prec_rev = 0.5454545454545454
-    prec_rev = Precision(rev=true)
-    @test prec_rev(ŷ, y) ≈ sk_prec_rev
-    sk_rec_rev = 0.46153846153846156
-    rec_rev = Recall(rev=true)
-    @test rec_rev(ŷ, y) ≈ sk_rec_rev
-end
-
-@testset "More multiclass metrics" begin
-    y = coerce(categorical([missing, 1, 2, 0, 2, 1, 0, 0, 1, 2, 2, 2, 1, 2,
-                            2, 1, 0, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1,
-                            2, 2, 2, 0]), Union{Missing,Multiclass})
-    ŷ = coerce(categorical([0, 2, 0, 2, 2, 2, 0, 1, 2, 1, 2, 0, 1, 2,
-                            1, 1, 1, 2, 0, 1, 2, 1, 2, 2, 2, 1, 2,
-                            1, 2, 2, missing]), Union{Missing,Multiclass})
-    w = Dict(0=>1, 1=>2, 2=>3) #class_w
-    # check all constructors
-    m = MulticlassTruePositive()
-    @test m(ŷ, y) == multiclass_truepositive(ŷ, y)
-    m = MulticlassTrueNegative()
-    @test m(ŷ, y) == multiclass_truenegative(ŷ, y)
-    m = MulticlassFalsePositive()
-    @test m(ŷ, y) == multiclass_falsepositive(ŷ, y)
-    m = MulticlassFalseNegative()
-    @test m(ŷ, y) == multiclass_falsenegative(ŷ, y)
-    m = MulticlassTruePositiveRate()
-    @test m(ŷ, y) == multiclass_tpr(ŷ, y) ==
-        multiclass_truepositive_rate(ŷ, y)
-    @test m(ŷ, y, w) == multiclass_tpr(ŷ, y, w) ==
-        multiclass_truepositive_rate(ŷ, y, w)
-    m = MulticlassTrueNegativeRate()
-    @test m(ŷ, y) == multiclass_tnr(ŷ, y) ==
-        multiclass_truenegative_rate(ŷ, y)
-    @test m(ŷ, y, w) == multiclass_tnr(ŷ, y, w) ==
-        multiclass_truenegative_rate(ŷ, y, w)
-    m = MulticlassFalsePositiveRate()
-    @test m(ŷ, y) == multiclass_fpr(ŷ, y) ==
-        multiclass_falsepositive_rate(ŷ, y)
-    @test m(ŷ, y, w) == multiclass_fpr(ŷ, y, w) ==
-        multiclass_falsepositive_rate(ŷ, y, w)
-    m = MulticlassFalseNegativeRate()
-    @test m(ŷ, y) == multiclass_fnr(ŷ, y) ==
-        multiclass_falsenegative_rate(ŷ, y)
-    @test m(ŷ, y, w) == multiclass_fnr(ŷ, y, w) ==
-        multiclass_falsenegative_rate(ŷ, y, w)
-    m = MulticlassFalseDiscoveryRate()
-    @test m(ŷ, y) == multiclass_fdr(ŷ, y) ==
-        multiclass_falsediscovery_rate(ŷ, y)
-    @test m(ŷ, y, w) == multiclass_fdr(ŷ, y, w) ==
-        multiclass_falsediscovery_rate(ŷ, y, w)
-    m = MulticlassPrecision()
-    @test m(ŷ, y) == multiclass_precision(ŷ, y)
-    @test m(ŷ, y, w) == multiclass_precision(ŷ, y, w)
-    m = MulticlassNegativePredictiveValue()
-    @test m(ŷ, y) == multiclass_npv(ŷ, y)
-    @test m(ŷ, y, w) == multiclass_npv(ŷ, y, w)
-    m = MulticlassFScore()
-    @test m(ŷ, y) == macro_f1score(ŷ, y)
-    @test m(ŷ, y, w) == macro_f1score(ŷ, y, w)
-    # check synonyms
-    m = MTPR(return_type=Vector)
-    @test m(ŷ, y) == multiclass_tpr(ŷ, y)
-    @test m(ŷ, y, w) == multiclass_tpr(ŷ, y, w)
-    m = MTNR(return_type=Vector)
-    @test m(ŷ, y) == multiclass_tnr(ŷ, y)
-    @test m(ŷ, y, w) == multiclass_tnr(ŷ, y, w)
-    m = MFPR()
-    @test m(ŷ, y) == multiclass_fpr(ŷ, y) == multiclass_fallout(ŷ, y)
-    @test m(ŷ, y, w) == multiclass_fpr(ŷ, y, w) ==
-        multiclass_fallout(ŷ, y, w)
-    m = MFNR()
-    @test m(ŷ, y) == multiclass_fnr(ŷ, y) ==
-        multiclass_miss_rate(ŷ, y)
-    @test m(ŷ, y, w) == multiclass_fnr(ŷ, y, w) ==
-        multiclass_miss_rate(ŷ, y, w)
-    m = MFDR()
-    @test m(ŷ, y) == multiclass_fdr(ŷ, y)
-    @test m(ŷ, y, w) == multiclass_fdr(ŷ, y, w)
-    m = MPPV()
-    @test m(ŷ, y) == MulticlassPrecision()(ŷ, y) ==
-        multiclass_ppv(ŷ, y)
-    @test m(ŷ, y, w) == MulticlassPrecision()(ŷ, y, w) ==
-        multiclass_ppv(ŷ, y, w)
-    m = MulticlassRecall()
-    @test m(ŷ, y) == multiclass_tpr(ŷ, y)
-    @test m(ŷ, y, w) == multiclass_tpr(ŷ, y, w)
-    @test m(ŷ, y) == multiclass_sensitivity(ŷ, y) ==
-        multiclass_hit_rate(ŷ, y)
-    @test m(ŷ, y, w) == multiclass_sensitivity(ŷ, y, w) ==
-        multiclass_hit_rate(ŷ, y, w)
-    m = MulticlassSpecificity()
-    @test m(ŷ, y) == multiclass_tnr(ŷ, y) == multiclass_specificity(ŷ, y) ==
-        multiclass_selectivity(ŷ, y)
-    @test m(ŷ, y, w) == multiclass_tnr(ŷ, y, w) ==
-        multiclass_specificity(ŷ, y, w) == multiclass_selectivity(ŷ, y, w)
-end
-
-
-@testset "Additional multiclass tests" begin
-    table = reshape(collect("aabbbccccddbabccbacccd"), 11, 2)
-    table = coerce(table, Multiclass);
-    yhat = table[:,1] # ['a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'c', 'd', 'd']
-    y    = table[:,2] # ['b', 'a', 'b', 'c', 'c', 'b', 'a', 'c', 'c', 'c', 'd']
-    class_w = Dict('a'=>7, 'b'=>5, 'c'=>2, 'd'=> 0)
-
-    # class | TP | FP | TP + FP | precision | FN | TP + FN | recall
-    # ------|----|----|------------------------------------|-------
-    # a     | 1  | 1  | 2       | 1/2       | 1  | 2       | 1/2
-    # b     | 1  | 2  | 3       | 1/3       | 2  | 3       | 1/3
-    # c     | 2  | 2  | 4       | 1/2       | 3  | 5       | 2/5
-    # d     | 1  | 1  | 2       | 1/2       | 0  | 1       | 1
-
-    # helper:
-    inverse(x) = 1/x
-    harmonic_mean(x, y; beta=1.0) =
-        (1 + inverse(beta^2))*inverse(mean(inverse(beta^2*x)+ inverse(y)))
-
-    # precision:
-    p_macro = mean([1/2, 1/3, 1/2, 1/2])
-    @test MulticlassPrecision()(yhat, y) ≈ p_macro
-    p_macro_w = mean([7/2, 5/3, 2/2, 0/2])
-    @test MulticlassPrecision()(yhat, y, class_w) ≈ p_macro_w
-    @test p_macro_w ≈
-        @test_logs((:warn, r"Using macro"),
-                     MulticlassPrecision(average=micro_avg)(yhat, y, class_w))
-    p_micro = (1 + 1 + 2 + 1)/(2 + 3 + 4 + 2)
-    @test MulticlassPrecision(average=micro_avg)(yhat, y) ≈ p_micro
-
-    # recall:
-    r_macro = mean([1/2, 1/3, 2/5, 1])
-    @test MulticlassRecall(average=macro_avg)(yhat, y) ≈ r_macro
-    r_macro_w = mean([7/2, 5/3, 4/5, 0/1])
-    @test MulticlassRecall(average=macro_avg)(yhat, y, class_w) ≈ r_macro_w
-    @test r_macro_w ≈
-        @test_logs((:warn, r"Using macro"),
-                     MulticlassRecall(average=micro_avg)(yhat, y, class_w))
-    r_micro = (1 + 1 + 2 + 1)/(2 + 3 + 5 + 1)
-    @test MulticlassPrecision(average=micro_avg)(yhat, y) ≈ r_micro
-
-    # fscore:
-    harm_means = [harmonic_mean(1/2, 1/2),
-                     harmonic_mean(1/3, 1/3),
-                     harmonic_mean(1/2, 2/5),
-                     harmonic_mean(1/2, 1)]
-    f1_macro = mean(harm_means)
-    @test MulticlassFScore(average=macro_avg)(yhat, y) ≈ f1_macro
-    @test MulticlassFScore(average=no_avg,
-                           return_type=Vector)(yhat, y, class_w) ≈
-        [7, 5, 2, 0] .* harm_means
-    f1_macro_w = mean([7, 5, 2, 0] .* harm_means)
-    @test MulticlassFScore(average=macro_avg)(yhat, y, class_w) ≈ f1_macro_w
-    @test f1_macro_w ≈
-        @test_logs((:warn, r"Using macro"),
-                     MulticlassFScore(average=micro_avg)(yhat, y, class_w))
-    f1_micro = harmonic_mean(p_micro, r_micro)
-    @test MulticlassFScore(average=micro_avg)(yhat, y) ≈ f1_micro
-
-    # fscore, β=1/3:
-    harm_means = [harmonic_mean(1/2, 1/2, beta=1/3),
-                     harmonic_mean(1/3, 1/3, beta=1/3),
-                     harmonic_mean(1/2, 2/5, beta=1/3),
-                     harmonic_mean(1/2, 1, beta=1/3)]
-    f1_macro = mean(harm_means)
-    @test MulticlassFScore(β=1/3, average=macro_avg)(yhat, y) ≈ f1_macro
-    @test MulticlassFScore(β=1/3,
-                           average=no_avg,
-                           return_type=Vector)(yhat, y, class_w) ≈
-        [7, 5, 2, 0] .* harm_means
-    f1_macro_w = mean([7, 5, 2, 0] .* harm_means)
-    @test MulticlassFScore(β=1/3,
-                           average=macro_avg)(yhat, y, class_w) ≈ f1_macro_w
-    @test f1_macro_w ≈
-        @test_logs((:warn, r"Using macro"),
-                   MulticlassFScore(β=1/3,
-                                    average=micro_avg)(yhat, y, class_w))
-    f1_micro = harmonic_mean(p_micro, r_micro, beta=1/3)
-    @test MulticlassFScore(β=1/3, average=micro_avg)(yhat, y) ≈ f1_micro
-end
-
-@testset "docstrings coverage" begin
-    @test startswith(info(BrierScore()).docstring, "`BrierScore`")
-end
diff --git a/test/measures/loss_functions_interface.jl b/test/measures/loss_functions_interface.jl
deleted file mode 100644
index 8c59945b..00000000
--- a/test/measures/loss_functions_interface.jl
+++ /dev/null
@@ -1,68 +0,0 @@
-rng = StableRNG(614)
-
-# convert a Binary vector into vector of +1 or -1 values
-# (for testing only):
-pm1(y) = Int8(2) .* (Int8.(MLJBase.int(y))) .- Int8(3)
-
-const MARGIN_LOSSES = MLJBase.MARGIN_LOSSES
-const DISTANCE_LOSSES = MLJBase.DISTANCE_LOSSES
-
-# using `WeightedSum` instead of `WeightedMean`; see
-# https://github.com/JuliaML/LossFunctions.jl/issues/149
-WeightedSum(w) = LossFunctions.AggMode.WeightedMean(w, normalize=false)
-
-@testset "naked" begin
-    @test MLJBase.naked(MLJBase.LossFunctions.PeriodicLoss{Float64}) ==
-        :PeriodicLoss
-    @test MLJBase.naked(MLJBase.LossFunctions.PeriodicLoss) ==
-        :PeriodicLoss
-end
-
-@testset "LossFunctions.jl - binary" begin
-    y = categorical(["yes", "yes", "no", "yes"])
-    yes, no = y[1], y[3]
-    dyes = MLJBase.UnivariateFinite([yes, no], [0.6, 0.4])
-    dno =  MLJBase.UnivariateFinite([yes, no], [0.3, 0.7])
-    yhat = [dno, dno, dyes, dyes]
-    w = [1, 2, 3, 4]
-
-    @test MLJBase.ZeroOneLoss()(yhat, y) ≈ [1, 1, 1, 0]
-    @test MLJBase.zero_one_loss(yhat,y, w) ≈ [1, 2, 3, 0]
-
-    N = 10
-    y = categorical(rand(rng, ["yes", "no"], N), ordered=true)
-    levels!(y, ["no", "yes"])
-    no, yes = MLJBase.classes(y[1])
-    @test pm1([yes, no]) in [[+1, -1], [-1, +1]]
-    ym = pm1(y) # observations for raw LossFunctions measure
-    p_vec = rand(N)
-    yhat = MLJBase.UnivariateFinite([no, yes], p_vec, augment=true)
-    yhatm = MLJBase._scale.(p_vec) # predictions for raw LossFunctions measure
-    w = rand(rng, N)
-
-    for M_ex in MARGIN_LOSSES
-        m = eval(:(MLJBase.$M_ex()))
-        @test m(yhat, y) ≈ (getfield(m, :loss)).(yhatm, ym)
-        @test m(yhat, y, w) ≈
-            w .* (getfield(m, :loss)).(yhatm, ym)
-    end
-end
-
-@testset "LossFunctions.jl - continuous" begin
-    # losses for continuous targets:
-    N    = 10
-    y    = randn(rng, N)
-    yhat = randn(rng, N)
-    X    = nothing
-    w    = rand(rng, N)
-
-    for M_ex in DISTANCE_LOSSES
-        m = eval(:(MLJBase.$M_ex()))
-        m_ex = MLJBase.snakecase(M_ex)
-        @test m == eval(:(MLJBase.$m_ex))
-        @test m(yhat, y) ≈
-            (getfield(m, :loss)).(yhat, y)
-        @test m(yhat ,y, w) ≈
-            w .* (getfield(m, :loss)).(yhat, y)
-    end
-end
diff --git a/test/measures/measure_search.jl b/test/measures/measure_search.jl
deleted file mode 100644
index f8aa5e4d..00000000
--- a/test/measures/measure_search.jl
+++ /dev/null
@@ -1,42 +0,0 @@
-ms = map(measures()) do m
-    m.name
-end
-@test "LogLoss" in ms
-@test "RootMeanSquaredError"  in ms
-
-# test `M()` makes sense for all measure types `M` extracted from `name`,
-@test all(Symbol.(ms)) do ex
-    try
-        eval(:($ex()))
-        true
-    catch
-        false
-    end
-end
-
-S = AbstractVector{Union{Missing,Multiclass{3}}}
-task(m) = S <: m.target_scitype
-
-ms = map(measures(task)) do m
-    m.name
-end
-
-@test "LogLoss" in ms
-@test !("RootMeanSquaredError"  in ms)
-
-task(m) = AbstractVector{Continuous} <: m.target_scitype
-
-ms = map(measures(task)) do m
-    m.name
-end
-
-@test !("Accuracy" in ms)
-@test "RootMeanSquaredError"  in ms
-
-ms = map(measures("Brier")) do  m
-    m.name
-end
-
-@test Set(ms) == Set(["BrierLoss", "BrierScore"])
-
-true
diff --git a/test/measures/measures.jl b/test/measures/measures.jl
deleted file mode 100644
index 602c3e78..00000000
--- a/test/measures/measures.jl
+++ /dev/null
@@ -1,134 +0,0 @@
-module TestMeasures
-
-using MLJBase, Test
-import Distributions
-using CategoricalArrays
-using Statistics
-import LossFunctions
-using StableRNGs
-using OrderedCollections: LittleDict
-
-rng  = StableRNGs.StableRNG(123)
-
-@testset "aggregation" begin
-    v = rand(5)
-    @test aggregate(v, mae) ≈ mean(v)
-    @test aggregate(v, TruePositive()) ≈ sum(v)
-    @test aggregate(v, rms) ≈ sqrt(mean(v.^2))
-    λ = rand()
-    @test aggregate(λ, rms) === λ
-    @test aggregate(aggregate(v, l2), l2) == aggregate(v, l2)
-    m = LittleDict([0, 1, 2, 3, 4], v)
-    @test aggregate(m, MTPR()) == mean(v)
-end
-
-@testset "metadata" begin
-    measures()
-    measures(m -> m.target_scitype <: AbstractVector{<:Finite} &&
-             m.supports_weights)
-    info(rms)
-    @test true
-end
-
-@testset "coverage" begin
-    # just checking that the traits work not that they're correct
-    @test orientation(BrierScore()) == :score
-    @test orientation(auc) == :score
-    @test orientation(rms) == :loss
-
-    @test reports_each_observation(auc) == false
-    @test is_feature_dependent(auc) == false
-
-    @test MLJBase.distribution_type(auc) == MLJBase.UnivariateFinite
-end
-
-@testset "MLJBase.value" begin
-    yhat = randn(rng,5)
-    X = (weight=randn(rng,5), x1 = randn(rng,5))
-    y = randn(rng,5)
-    w = randn(rng,5)
-
-    @test MLJBase.value(mae, yhat, nothing, y, nothing) ≈ mae(yhat, y)
-    @test MLJBase.value(mae, yhat, nothing, y, w) ≈ mae(yhat, y, w)
-
-    spooky(yhat, y) = abs.(yhat - y) |> mean
-    @test MLJBase.value(spooky, yhat, nothing, y, nothing) ≈ mae(yhat, y)
-
-    cool(yhat, y, w) = abs.(yhat - y) .* w |> mean
-    MLJBase.supports_weights(::Type{typeof(cool)}) = true
-    @test MLJBase.value(cool, yhat, nothing, y, w) ≈ mae(yhat, y, w)
-
-    funky(yhat, X, y) = X.weight .* abs.(yhat - y) |> mean
-    MLJBase.is_feature_dependent(::Type{typeof(funky)}) = true
-    @test MLJBase.value(funky, yhat, X, y, nothing) ≈ mae(yhat, y, X.weight)
-
-    weird(yhat, X, y, w) = w .* X.weight .* abs.(yhat - y) |> mean
-    MLJBase.is_feature_dependent(::Type{typeof(weird)}) = true
-    MLJBase.supports_weights(::Type{typeof(weird)}) = true
-    @test MLJBase.value(weird, yhat, X, y, w) ≈ mae(yhat, y, X.weight .* w)
-end
-
-mutable struct DRegressor <: Deterministic end
-MLJBase.target_scitype(::Type{<:DRegressor}) =
-    AbstractVector{<:Continuous}
-
-mutable struct D2Regressor <: Deterministic end
-MLJBase.target_scitype(::Type{<:D2Regressor}) =
-    AbstractVector{Continuous}
-
-mutable struct DClassifier <: Deterministic end
-MLJBase.target_scitype(::Type{<:DClassifier}) =
-    AbstractVector{<:Finite}
-
-mutable struct PClassifier <: Probabilistic end
-MLJBase.target_scitype(::Type{<:PClassifier}) =
-    AbstractVector{<:Finite}
-
-mutable struct PRegressor <: Probabilistic end
-MLJBase.target_scitype(::Type{<:PRegressor}) =
-    AbstractVector{<:Continuous}
-
-mutable struct PCountRegressor <: Probabilistic end
-MLJBase.target_scitype(::Type{<:PCountRegressor}) =
-    AbstractVector{<:Count}
-
-@testset "default_measure" begin
-    @test MLJBase.default_measure(DRegressor()) == rms
-    @test MLJBase.default_measure(D2Regressor()) == rms
-    @test MLJBase.default_measure(DClassifier()) == misclassification_rate
-    @test MLJBase.default_measure(PClassifier()) == log_loss
-
-    @test MLJBase.default_measure(DRegressor) == rms
-    @test MLJBase.default_measure(D2Regressor) == rms
-    @test MLJBase.default_measure(DClassifier) == misclassification_rate
-    @test MLJBase.default_measure(PClassifier) == log_loss
-
-    @test MLJBase.default_measure(PRegressor) == log_loss
-    @test MLJBase.default_measure(PCountRegressor) == log_loss
-end
-
-include("confusion_matrix.jl")
-include("roc.jl")
-include("continuous.jl")
-include("finite.jl")
-include("probabilistic.jl")
-include("loss_functions_interface.jl")
-
-@testset "show method for measures" begin
-    io = IOBuffer()
-    for meta in measures()
-        m = eval(Meta.parse("$(meta.name)()"))
-        show(io, MIME("text/plain"), m)
-        show(io, m)
-    end
-end
-
-@testset "missing and NaN values in aggregation" begin
-    v =[1, 2, missing, 5, NaN]
-    @test MLJBase.Sum()(v) == 8
-    @test MLJBase.RootMeanSquare()(v) ≈ sqrt((1 + 4 + 25)/3)
-    @test MLJBase.Mean()(Union{Missing,Float32}[]) |> isnan
-end
-
-end
-true
diff --git a/test/measures/probabilistic.jl b/test/measures/probabilistic.jl
deleted file mode 100644
index 733c0d20..00000000
--- a/test/measures/probabilistic.jl
+++ /dev/null
@@ -1,174 +0,0 @@
-rng = StableRNG(51803)
-using LinearAlgebra
-
-const Vec = AbstractVector
-
-@testset "AUC" begin
-    # this is random binary and random scores generated with numpy
-    # then using roc_auc_score from sklearn to get the AUC
-    # we check that we recover a comparable AUC and that it's invariant
-    # to ordering.
-    c = ["neg", "pos"]
-    y = categorical(c[[0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
-                     1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1,
-                     1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
-                     1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0,
-                     1, 0] .+ 1])
-    probs = [
-        0.90237535, 0.41276349, 0.94511611, 0.08390761, 0.55847392,
-        0.26043136, 0.78565351, 0.20133953, 0.7404382 , 0.15307601,
-        0.59596716, 0.8169512 , 0.88200483, 0.23321489, 0.94050483,
-        0.27593662, 0.60702176, 0.36427036, 0.35481784, 0.06416543,
-        0.45576954, 0.12354048, 0.79830435, 0.15799818, 0.20981099,
-        0.43451663, 0.24020098, 0.11401055, 0.25785748, 0.86490263,
-        0.75715379, 0.06550534, 0.12628999, 0.18878245, 0.1283757 ,
-        0.76542903, 0.8780248 , 0.86891113, 0.24835709, 0.06528076,
-        0.72061354, 0.89451634, 0.95634394, 0.07555979, 0.16345437,
-        0.43498831, 0.37774708, 0.31608861, 0.41369339, 0.95691113]
-
-    ŷ = UnivariateFinite(y[1:2], probs, augment=true)
-    # ŷ = [UnivariateFinite(y[1:2], [1.0 - p, p]) for p in [
-    #     0.90237535, 0.41276349, 0.94511611, 0.08390761, 0.55847392,
-    #     0.26043136, 0.78565351, 0.20133953, 0.7404382 , 0.15307601,
-    #     0.59596716, 0.8169512 , 0.88200483, 0.23321489, 0.94050483,
-    #     0.27593662, 0.60702176, 0.36427036, 0.35481784, 0.06416543,
-    #     0.45576954, 0.12354048, 0.79830435, 0.15799818, 0.20981099,
-    #     0.43451663, 0.24020098, 0.11401055, 0.25785748, 0.86490263,
-    #     0.75715379, 0.06550534, 0.12628999, 0.18878245, 0.1283757 ,
-    #     0.76542903, 0.8780248 , 0.86891113, 0.24835709, 0.06528076,
-    #     0.72061354, 0.89451634, 0.95634394, 0.07555979, 0.16345437,
-    #     0.43498831, 0.37774708, 0.31608861, 0.41369339, 0.95691113]]
-    @test isapprox(auc(ŷ, y), 0.455716, rtol=1e-4)
-    ŷ_unwrapped = [ŷ...]
-    @test isapprox(auc(ŷ_unwrapped, y), 0.455716, rtol=1e-4)
-
-    # reversing the roles of positive and negative should return very
-    # similar score
-    y2 = deepcopy(y);
-    levels!(y2, reverse(levels(y2)));
-    @test y == y2
-    @test levels(y) != levels(y2)
-    ŷ2 = UnivariateFinite(y2[1:2], probs, augment=true) # same probs
-    @test isapprox(auc(ŷ2, y2), auc(ŷ, y), rtol=1e-4)
-
-    # The auc algorithm should be able to handle the case where two or more
-    # samples in the prediction vector has the same UnivariateFinite distribution
-    # We check this by comparing our auc with that gotten from roc_auc_score from sklearn.
-    y = categorical(["class_1","class_1","class_0","class_0","class_1","class_1","class_0"])
-    ŷ = UnivariateFinite(levels(y), [0.8,0.7,0.5,0.5,0.5,0.5,0.3], augment=true, pool=y)
-    # We can see that ŷ[3] ≈ ŷ[4] ≈ ŷ[5] ≈ ŷ[6]
-    @test isapprox(auc(ŷ, y), 0.8333333333333334, rtol=1e-16)
-end
-
-@testset "Log, Brier, Spherical - finite case" begin
-    y = categorical(collect("abb"))
-    L = [y[1], y[2]]
-    d1 = UnivariateFinite(L, [0.1, 0.9]) # a
-    d2 = UnivariateFinite(L, Float32[0.4, 0.6]) # b
-    d3 = UnivariateFinite(L, [0.2, 0.8]) # b
-    yhat = [d1, d2, d3]
-    ym = vcat(y, [missing,])
-    yhatm = vcat(yhat, [d3, ])
-
-    @test mean(log_loss(yhat, y)) ≈
-        Float32(-(log(0.1) + log(0.6) + log(0.8))/3)
-    @test mean(skipmissing(log_loss(yhatm, ym))) ≈
-        Float32(-(log(0.1) + log(0.6) + log(0.8))/3)
-    yhat = UnivariateFinite(L, [0.1 0.9;
-                                0.4 0.6;
-                                0.2 0.8])
-    @test isapprox(mean(log_loss(yhat, y)),
-                   -(log(0.1) + log(0.6) + log(0.8))/3, atol=eps(Float32))
-
-    @test log_score(yhat, y) ≈ -log_loss(yhat, y)
-
-    # sklearn test
-    # >>> from sklearn.metrics import log_loss
-    # >>> log_loss(["spam", "ham", "ham", "spam","ham","ham"],
-    #    [[.1, .9], [.9, .1], [.8, .2], [.35, .65], [0.2, 0.8], [0.3,0.7]])
-    # 0.6130097025803921
-    y2 = categorical(["spam", "ham", "ham", "spam", "ham", "ham"])
-    L2 = classes(y2[1])
-    probs = vcat([.1 .9], [.9 .1], [.8 .2], [.35 .65], [0.2 0.8], [0.3 0.7])
-    yhat2 = UnivariateFinite(L2, probs)
-    y2m = vcat(y2, [missing,])
-    yhat2m = UnivariateFinite(L2, vcat(probs, [0.1 0.9]))
-    @test mean(log_loss(yhat2, y2)) ≈ 0.6130097025803921
-    @test mean(skipmissing(log_loss(yhat2, y2))) ≈ 0.6130097025803921
-
-    ## Brier
-    scores = BrierScore()(yhat, y)
-    @test size(scores) == size(y)
-    @test Float32.(scores) ≈ [-1.62, -0.32, -0.08]
-    scoresm = BrierScore()(yhatm, ym)
-    @test Float32.((scoresm)[1:3]) ≈ [-1.62, -0.32, -0.08]
-    @test ismissing(scoresm[end])
-    # test specialized broadcasting on brierloss
-    @test BrierLoss()(yhat, y) == -BrierScore()(yhat, y) 
-    # sklearn test
-    # >>> from sklearn.metrics import brier_score_loss
-    # >>> brier_score_loss([1, 0, 0, 1, 0, 0], [.9, .1, .2, .65, 0.8, 0.7])
-    # 0.21875 NOTE: opposite orientation
-    @test -mean(BrierScore()(yhat2, y2)) / 2 ≈ 0.21875
-    probs2 = [[.1, .9], [Float32(0.9), Float32(1) - Float32(0.9)], [.8, .2],
-              [.35, .65], [0.2, 0.8], [0.3, 0.7]]
-    yhat3 = [UnivariateFinite(L2, prob) for prob in probs2]
-    @test -mean(BrierScore()(yhat3, y2) / 2) ≈ 0.21875
-    @test mean(BrierLoss()(yhat3, y2) / 2) ≈ -mean(BrierScore()(yhat3, y2) / 2)
-
-    # Spherical
-    s = SphericalScore() # SphericalScore(2)
-    norms = [norm(probs[i,:]) for i in 1:size(probs, 1)]
-    @test (pdf.(yhat2, y2) ./ norms) ≈  s(yhat2, y2)
-    # non-performant version:
-    yhat4 = [yhat2...]
-    @test (pdf.(yhat2, y2) ./ norms) ≈  s(yhat4, y2)
-end
-
-@testset "LogScore, BrierScore, SphericalScore - infinite case" begin
-    uniform = Distributions.Uniform(2, 5)
-    betaprime = Distributions.BetaPrime()
-    discrete_uniform = Distributions.DiscreteUniform(2, 5)
-    w = [2, 3]
-
-    # brier
-    yhat = [missing, uniform]
-    @test isapprox(brier_score(yhat, [1.0, 1.0]) |> last, -1/3)
-    @test isapprox(brier_score(yhat, [NaN, 4.0]) |> last,  1/3)
-    @test isapprox(brier_score(yhat, [1.0, 1.0], w) |> last, -1)
-    yhat = [missing, uniform]
-    # issue https://github.com/JuliaStats/Distributions.jl/issues/1392
-    @test_broken isapprox(brier_score(yhat, [missing, 4.0], w), [1,])
-    yhat = [discrete_uniform, discrete_uniform]
-    @test isapprox(brier_score(yhat, [NaN, 1.0]), [-1/4, -1/4,])
-    @test isapprox(brier_score(yhat, [4.0, 4.0]), [1/4, 1/4,])
-
-    # spherical
-    yhat = [uniform, uniform]
-    @test isapprox(spherical_score(yhat, [1.0, 1.0]), [0, 0])
-    @test isapprox(spherical_score(yhat, [NaN, 4.0]), [0, 1/sqrt(3),])
-    # issue https://github.com/JuliaStats/Distributions.jl/issues/1392
-    @test_broken isapprox(spherical_score(yhat, [missing, 4.0], w), [sqrt(3),])
-    @test isapprox(spherical_score(yhat, [4.0, 4.0], w), [2/sqrt(3), sqrt(3),])
-    yhat = [discrete_uniform, discrete_uniform]
-    @test isapprox(spherical_score(yhat, [NaN, 1.0]), [0, 0])
-    @test isapprox(spherical_score(yhat, [4.0, 4.0]), [1/2, 1/2])
-
-    # log
-    yhat = [uniform, uniform]
-    @test isapprox(log_score(yhat, [4.0, 4.0]), [-log(3), -log(3),])
-    @test isapprox(log_score(yhat, [4.0, 4.0], w), [-2*log(27)/3, -log(27)])
-    yhat = [discrete_uniform, discrete_uniform]
-    # issue https://github.com/JuliaStats/Distributions.jl/issues/1392
-    @test_broken  isapprox(log_score(yhat, [missing, 4.0]), [-log(4),])
-
-    log_score([missing, uniform], [4.0, 4.0])
-
-    # errors
-    @test_throws(MLJBase.err_l2_norm(brier_score),
-                 brier_score([betaprime, betaprime], [1.0, 1.0]))
-    s = SphericalScore(alpha=1)
-    @test_throws MLJBase.ERR_UNSUPPORTED_ALPHA s(yhat, [1.0, 1.0])
-end
-
-true
diff --git a/test/measures/roc.jl b/test/measures/roc.jl
deleted file mode 100644
index aaaed8b7..00000000
--- a/test/measures/roc.jl
+++ /dev/null
@@ -1,13 +0,0 @@
-@testset "ROC" begin
-    y = [  0   0   0   1   0   1   1   0] |> vec |> categorical
-    s = [0.0 0.1 0.1 0.1 0.2 0.2 0.5 0.5] |> vec
-    ŷ = UnivariateFinite([0, 1], s, augment=true, pool=y)
-
-    fprs, tprs, ts = roc(ŷ, y)
-
-    sk_fprs = [0. , 0.2, 0.4, 0.8, 1. ]
-    sk_tprs = [0. , 0.33333333, 0.66666667, 1., 1.]
-
-    @test fprs ≈ sk_fprs
-    @test tprs ≈ sk_tprs
-end
diff --git a/test/operations.jl b/test/operations.jl
index e14b7702..5970cb0f 100644
--- a/test/operations.jl
+++ b/test/operations.jl
@@ -57,7 +57,7 @@ using ..Models
     @test_throws ArgumentError transform(m, Tuple(y1), Tuple(y2))
 end
 
-@testset "operations on network-composite models" begin
+@testset "operations on NetworkComposite models" begin
     X = MLJBase.table(rand(4, 4))
     y = rand(4)
     m = fit!(machine(SimpleProbabilisticNetworkCompositeModel(), X, y), verbosity=0)
@@ -67,21 +67,6 @@ end
     @test_throws ErrorException transform(m, X)
 end
 
-# Test below to be removed after next breaking release
-@testset "operations on composite/surrogate models" begin
-    X = MLJBase.table(rand(4, 4))
-    y = rand(4)
-    m = fit!(machine(SimpleDeterministicCompositeModel(), X, y), verbosity=0)
-    @test predict(m, X) == m.fitresult.predict(X)
-    @test_throws ErrorException transform(m, X)
-
-    m = fit!(machine(SimpleProbabilisticCompositeModel(), X, y), verbosity=0)
-    predictions = m.fitresult.predict(X)
-    @test predict(m, X) == predictions
-    @test predict_mode(m, X) == mode.(predictions)
-    @test_throws ErrorException transform(m, X)
-end
-
 end
 
 true
diff --git a/test/preliminaries.jl b/test/preliminaries.jl
index b806a840..bffc1f4e 100644
--- a/test/preliminaries.jl
+++ b/test/preliminaries.jl
@@ -12,12 +12,8 @@ using Distributed
 addprocs(; exeflags="--project=$(Base.active_project())")
 
 @info "nprocs() = $(nprocs())"
-@static if VERSION >= v"1.3.0-DEV.573"
-    import .Threads
-    @info "nthreads() = $(Threads.nthreads())"
-else
-    @info "Running julia $(VERSION). Multithreading tests excluded. "
-end
+import .Threads
+@info "nthreads() = $(Threads.nthreads())"
 
 @everywhere begin
     using MLJModelInterface
@@ -27,6 +23,7 @@ end
     using Logging
     using ComputationalResources
     using StableRNGs
+    using StatisticalMeasures
 end
 
 import TypedTables
diff --git a/test/resampling.jl b/test/resampling.jl
index c170039a..27850375 100644
--- a/test/resampling.jl
+++ b/test/resampling.jl
@@ -5,6 +5,9 @@ import ComputationalResources: CPU1, CPUProcesses, CPUThreads
 using .TestUtilities
 using ProgressMeter
 import Tables
+@everywhere import StatisticalMeasures.StatisticalMeasuresBase as API
+using StatisticalMeasures
+import LearnAPI
 
 @everywhere begin
     using .Models
@@ -25,13 +28,18 @@ struct DummyInterval <: Interval end
 dummy_interval=DummyInterval()
 
 dummy_measure_det(yhat, y) = 42
-MLJBase.target_scitype(::typeof(dummy_measure_det)) = Table(MLJBase.Textual)
-MLJBase.prediction_type(::typeof(dummy_measure_det)) = :deterministic
-
-dummy_measure_interval(yhat, y) = [123, 456]
-MLJBase.target_scitype(::typeof(dummy_measure_interval)) =
-    Table(MLJBase.Textual)
-MLJBase.prediction_type(::typeof(dummy_measure_interval)) = :interval
+API.@trait(
+    typeof(dummy_measure_det),
+    observation_scitype = MLJBase.Textual,
+    kind_of_proxy = LearnAPI.LiteralTarget(),
+)
+
+dummy_measure_interval(yhat, y) = 42
+API.@trait(
+    typeof(dummy_measure_interval),
+    observation_scitype = MLJBase.Textual,
+    kind_of_proxy = LearnAPI.ConfidenceInterval(),
+)
 
 @testset "_actual_operations" begin
     clf = ConstantClassifier()
@@ -49,7 +57,7 @@ MLJBase.prediction_type(::typeof(dummy_measure_interval)) = :interval
                                     1) ==
                                    [predict_mean, predict_mean]
 
-    # handling of a measure with `:unknown` `prediction_type` (eg,
+    # handling of a measure with `nothing` `kind_of_proxy` (eg,
     # custom measure):
     my_mae(yhat, y) = abs.(yhat - y)
     @test(
@@ -71,21 +79,29 @@ MLJBase.prediction_type(::typeof(dummy_measure_interval)) = :interval
            [predict_mode])
     @test MLJBase._actual_operations(nothing, [l2,], rgs, 1) ==
         [predict_mean, ]
-    @test_throws(MLJBase.err_incompatible_prediction_types(clf_det, LogLoss()),
-                 MLJBase._actual_operations(nothing, [LogLoss(),], clf_det, 1))
+    @test_throws(
+        MLJBase.err_incompatible_prediction_types(clf_det, LogLoss()),
+        MLJBase._actual_operations(nothing, [LogLoss(),], clf_det, 1),
+    )
     @test MLJBase._actual_operations(nothing, measures_det, clf_det, 1) ==
         [predict, predict]
 
-    # measure/model differ in prediction type but weird target_scitype:
+    # measure/model differ in prediction type:
     @test_throws(
         MLJBase.err_ambiguous_operation(clf, dummy_measure_det),
-        MLJBase._actual_operations(nothing, [dummy_measure_det, ], clf, 1))
+        MLJBase._actual_operations(nothing, [dummy_measure_det, ], clf, 1),
+    )
 
     # measure has :interval prediction type but model does not (2 cases):
     @test_throws(
         MLJBase.err_ambiguous_operation(clf, dummy_measure_interval),
-        MLJBase._actual_operations(nothing,
-                                   [dummy_measure_interval, ], clf, 1))
+        MLJBase._actual_operations(
+            nothing,
+            [dummy_measure_interval, ],
+            clf,
+            1,
+        ),
+    )
     @test_throws(
         MLJBase.err_ambiguous_operation(clf_det, dummy_measure_interval),
         MLJBase._actual_operations(nothing,
@@ -103,16 +119,6 @@ MLJBase.prediction_type(::typeof(dummy_measure_interval)) = :interval
                                    [LogLoss(), ], dummy_interval, 1))
 end
 
-@testset "_feature_dependencies_exist" begin
-    measures = Any[rms, rsq, log_loss, brier_score]
-    @test !MLJBase._feature_dependencies_exist(measures)
-    my_feature_dependent_loss(ŷ, X, y) =
-        sum(abs.(ŷ - y) .* X.penalty)/sum(X.penalty);
-    MLJBase.is_feature_dependent(::typeof(my_feature_dependent_loss)) = true
-    push!(measures, my_feature_dependent_loss)
-    @test MLJBase._feature_dependencies_exist(measures)
-end
-
 @testset_accelerated "dispatch of resources and progress meter" accel begin
 
     @info "Checking progress bars:"
@@ -175,34 +181,50 @@ end
     y = rand(rng,4)
 
     # model prediction type is Probablistic but measure is Deterministic:
-    @test_throws(ArgumentError,
-                  MLJBase._check_measure(rms, predict, model, y))
+    @test_throws(
+        MLJBase.ERR_MEASURES_PROBABILISTIC(rms, MLJBase.LOG_SUGGESTION2),
+        MLJBase._check_measure(rms, predict, model, y),
+    )
 
     @test MLJBase._check_measure(rms, predict_mean, model, y)
 
     @test MLJBase._check_measure(rms, predict_median, model, y)
 
-    # has `y`  `Finite` elscityp but measure `rms` is for `Continuous`:
+    # has `y`  `Finite` elscitype but measure `rms` is for `Continuous`:
     y=categorical(collect("abc"))
-    @test_throws(ArgumentError,
-                 MLJBase._check_measure(rms, predict_median, model, y))
+    @test_throws(
+        MLJBase.ERR_MEASURES_OBSERVATION_SCITYPE(
+            rms,
+            Union{Missing,Infinite},
+            Multiclass{3},
+        ),
+        MLJBase._check_measure(rms, predict_median, model, y),
+    )
     model = ConstantClassifier()
     # model prediction type is Probablistic but measure is Deterministic:
-    @test_throws(ArgumentError,
-                 MLJBase._check_measure(mcr, predict, model, y))
+    @test_throws(
+        MLJBase.ERR_MEASURES_PROBABILISTIC(mcr, MLJBase.LOG_SUGGESTION1),
+        MLJBase._check_measure(mcr, predict, model, y),
+    )
 
     @test MLJBase._check_measure(mcr, predict_mode, model, y)
 
     # `Determistic` model but `Probablistic` measure:
     model = DeterministicConstantClassifier()
-    @test_throws(ArgumentError,
-                 MLJBase._check_measure(cross_entropy, predict, model, y))
+    @test_throws(
+        MLJBase.ERR_MEASURES_DETERMINISTIC(cross_entropy),
+        MLJBase._check_measure(cross_entropy, predict, model, y),
+    )
 
     # measure with wrong target_scitype:
-    @test_throws(ArgumentError,
-                 MLJBase._check_measures([brier_score, rms],
-                                         [predict_mode, predict_mean],
-                                         model, y))
+    @test_throws(
+        MLJBase.ERR_MEASURES_DETERMINISTIC(brier_score),
+        MLJBase._check_measures(
+            [brier_score, rms],
+            [predict_mode, predict_mean],
+            model, y,
+        ),
+    )
 
     model = ConstantClassifier()
     @test MLJBase._check_measures([brier_score, cross_entropy, accuracy],
@@ -211,8 +233,6 @@ end
 end
 
 @testset "check weights" begin
-    @test_throws(MLJBase.ERR_WEIGHTS_REAL,
-                 MLJBase._check_weights([:junk, :junk], 2))
     @test_throws(MLJBase.ERR_WEIGHTS_LENGTH,
                  MLJBase._check_weights([0.5, 0.5], 3))
     @test MLJBase._check_weights([0.5, 0.5], 2)
@@ -227,18 +247,18 @@ end
     @test MLJBase._check_class_weights(w, ['b', 'a'])
 end
 
+@everywhere begin
+    user_rms(yhat, y) = mean((yhat -y).^2) |> sqrt
+    # deliberately omitting `consumes_multiple_observations` trait:
+    API.@trait typeof(user_rms) kind_of_proxy=LearnAPI.LiteralTarget()
+end
+
 @testset_accelerated "folds specified" accel begin
     x1 = ones(10)
     x2 = ones(10)
     X  = (x1=x1, x2=x2)
     y  = [1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0]
 
-    my_rms(yhat, y) = sqrt(mean((yhat -y).^2))
-    my_mae(yhat, y) = abs.(yhat - y)
-    MLJBase.reports_each_observation(::typeof(my_mae)) = true
-    MLJBase.prediction_type(::typeof(my_rms)) = :deterministic
-    MLJBase.prediction_type(::typeof(my_mae)) = :deterministic
-
     resampling = [(3:10, 1:2),
                   ([1, 2, 5, 6, 7, 8, 9, 10], 3:4),
                   ([1, 2, 3, 4, 7, 8, 9, 10], 5:6),
@@ -251,19 +271,27 @@ end
         mach  = machine(model, X, y, cache=cache)
 
         # check detection of incompatible measure (cross_entropy):
-        @test_throws ArgumentError evaluate!(mach, resampling=resampling,
-                                             measure=[cross_entropy, rmslp1],
-                                             verbosity=verb,
-                                             acceleration=accel)
+        @test_throws(
+            MLJBase.err_incompatible_prediction_types(model, cross_entropy),
+            evaluate!(
+                mach,
+                resampling=resampling,
+                measure=[cross_entropy, rmslp1],
+                verbosity=verb,
+                acceleration=accel,
+            ),
+        )
         result = evaluate!(mach, resampling=resampling, verbosity=verb,
-                           measure=[my_rms, my_mae, rmslp1], acceleration=accel)
+                           measure=[user_rms, mae, rmslp1], acceleration=accel)
 
         v = [1/2, 3/4, 1/2, 3/4, 1/2]
 
         @test result.per_fold[1] ≈ v
         @test result.per_fold[2] ≈ v
         @test result.per_fold[3][1] ≈ abs(log(2) - log(2.5))
-        @test ismissing(result.per_observation[1])
+        @test result.per_observation[1] ≈ map(result.per_fold[1]) do μ
+            fill(μ, 2)
+        end
         @test result.per_observation[2][1] ≈ [1/2, 1/2]
         @test result.per_observation[2][2] ≈ [3/4, 3/4]
         @test result.measurement[1] ≈ mean(v)
@@ -276,6 +304,42 @@ end
     end
 end
 
+@testset "folds specified - per_observation=false" begin
+    accel = CPU1()
+    cache = true
+    x1 = ones(10)
+    x2 = ones(10)
+    X  = (x1=x1, x2=x2)
+    y  = [1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0]
+
+    resampling = [(3:10, 1:2),
+                  ([1, 2, 5, 6, 7, 8, 9, 10], 3:4),
+                  ([1, 2, 3, 4, 7, 8, 9, 10], 5:6),
+                  ([1, 2, 3, 4, 5, 6, 9, 10], 7:8),
+                  (1:8, 9:10)]
+
+    model = DeterministicConstantRegressor()
+    mach  = machine(model, X, y, cache=cache)
+
+    result = evaluate!(mach, resampling=resampling, verbosity=verb,
+                       measure=[user_rms, mae, rmslp1], acceleration=accel,
+                       per_observation=false)
+
+    v = [1/2, 3/4, 1/2, 3/4, 1/2]
+
+    @test result.per_fold[1] ≈ v
+    @test result.per_fold[2] ≈ v
+    @test result.per_fold[3][1] ≈ abs(log(2) - log(2.5))
+    @test result.per_observation isa Vector{Missing}
+    @test result.measurement[1] ≈ mean(v)
+    @test result.measurement[2] ≈ mean(v)
+
+    # fitted_params and report per fold:
+    @test map(fp->fp.fitresult, result.fitted_params_per_fold) ≈
+        [1.5, 1.25, 1.5, 1.25, 1.5]
+    @test all(isnothing, result.report_per_fold)
+end
+
 @testset "repeated resampling" begin
     x1 = ones(20)
     x2 = ones(20)
@@ -313,10 +377,11 @@ end
     model = Models.DeterministicConstantRegressor()
     for cache in [true, false]
         mach = machine(model, X, y, cache=cache)
+        # to see if a default measure is found:
+        evaluate!(mach, resampling=holdout, verbosity=verb,
+                  acceleration=accel)
         result = evaluate!(mach, resampling=holdout, verbosity=verb,
                            measure=[rms, rmslp1], acceleration=accel)
-        result = evaluate!(mach, resampling=holdout, verbosity=verb,
-                           acceleration=accel)
         @test result.measurement[1] ≈ 2/3
 
         # test direct evaluation of a model + data:
@@ -454,7 +519,7 @@ end
                d for fold in folds])
 end
 
-@testset_accelerated "sample weights in evaluation" accel begin
+@testset_accelerated "weights in evaluation" accel begin
     # cv:
     x1 = ones(4)
     x2 = ones(4)
@@ -483,7 +548,7 @@ end
     X, y = make_blobs(rng=rng)
     cv=CV(nfolds = 2)
     fold1, fold2 = partition(eachindex(y), 0.5)
-    m = MLJBase.MulticlassFScore()
+    m = MulticlassFScore()
     class_w = Dict(1=>1, 2=>2, 3=>3)
 
     model = Models.DeterministicConstantClassifier()
@@ -637,13 +702,6 @@ end
                                measure=misclassification_rate,
                                weights = fill(1, 100), acceleration=accel,
                                verbosity=verb))
-
-        @test_throws(ArgumentError,
-                     evaluate!(mach, resampling=Holdout(fraction_train=0.6),
-                               operation=predict_mode,
-                               measure=misclassification_rate,
-                               weights = fill('a', 5), acceleration=accel,
-                               verbosity=verb))
     end
 
     # resampling on a subset of all rows:
@@ -813,7 +871,7 @@ end
         operation=predict_mode,
         measure=ConfusionMatrix(),
         resampling=CV(),
-    )
+    );
     printed_evaluations = sprint(show, "text/plain", evaluations)
     @test contains(printed_evaluations, "N/A")
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index 8b07929e..0c5593af 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -18,6 +18,7 @@ include("preliminaries.jl")
 @conditional_testset "misc" begin
     @test include("utilities.jl")
     @test include("static.jl")
+    @test include("show.jl")
 end
 
 @conditional_testset "interface" begin
@@ -25,10 +26,8 @@ end
     @test include("interface/data_utils.jl")
 end
 
-@conditional_testset "measures" begin
-    @test include("measures/measures.jl")
-    @test include("measures/measure_search.jl")
-    @test include("measures/doc_strings.jl")
+@conditional_testset "default_measures" begin
+    @test include("default_measures.jl")
 end
 
 @conditional_testset "resampling" begin
@@ -57,14 +56,11 @@ end
     @test include("composition/learning_networks/nodes.jl")
     @test include("composition/learning_networks/inspection.jl")
     @test include("composition/learning_networks/signatures.jl")
-    @test include("composition/learning_networks/deprecated_machines.jl")
     @test include("composition/learning_networks/replace.jl")
 end
 
 @conditional_testset "composition_models" begin
     @test include("composition/models/network_composite.jl")
-    @test include("composition/models/deprecated_methods.jl")
-    @test include("composition/models/deprecated_from_network.jl")
     @test include("composition/models/inspection.jl")
     @test include("composition/models/pipelines.jl")
     @test include("composition/models/transformed_target_model.jl")
diff --git a/test/show.jl b/test/show.jl
new file mode 100644
index 00000000..44aff52c
--- /dev/null
+++ b/test/show.jl
@@ -0,0 +1,14 @@
+using .Models
+
+@testset "display of models" begin
+    io = IOBuffer()
+    show(io, KNNRegressor())
+    @test String(take!(io)) == "KNNRegressor(K = 5, …)"
+    show(io, MIME("text/plain"), KNNRegressor())
+    @test String(take!(io)) ==
+        "KNNRegressor(\n  K = 5, \n  algorithm = :kdtree, \n  "*
+        "metric = Distances.Euclidean(0.0), \n  leafsize = 10, \n  "*
+        "reorder = true, \n  weights = :uniform)"
+end
+
+true
diff --git a/test/utilities.jl b/test/utilities.jl
index f9e40580..5356ce66 100644
--- a/test/utilities.jl
+++ b/test/utilities.jl
@@ -171,5 +171,50 @@ end
         "sin, cos, tan, ..."
 end
 
+@testset "observation" begin
+    @test MLJBase.observation(AbstractVector{Count}) ==
+        Count
+    @test MLJBase.observation(AbstractVector{<:Count}) ==
+        Count
+    @test MLJBase.observation(AbstractVector{<:Union{Missing,Count}}) ==
+        Union{Missing,Count}
+    @test MLJBase.observation(AbstractMatrix{<:Count}) ==
+        AbstractVector{<:Count}
+    @test MLJBase.observation(AbstractMatrix{Union{Missing,Count}}) ==
+        AbstractVector{Union{Missing,Count}}
+    @test MLJBase.observation(AbstractMatrix{<:Union{Missing,Count}}) ==
+        AbstractVector{<:Union{Missing,Count}}
+    @test MLJBase.observation(Table(Count)) == AbstractVector{<:Count}
+end
+
+@testset "guess_observation_scitype" begin
+    @test MLJBase.guess_observation_scitype([missing, 1, 2, 3]) ==
+        Union{Missing, Count}
+    @test MLJBase.guess_observation_scitype(rand(3, 2)) ==
+        AbstractVector{Continuous}
+    @test MLJBase.guess_observation_scitype((x=rand(3), y=rand(Bool, 3))) ==
+        AbstractVector{Union{Continuous, Count}}
+    @test MLJBase.guess_observation_scitype((x=[missing, 1, 2], y=[1, 2, 3])) ==
+        Unknown
+    @test MLJBase.guess_observation_scitype(5) == Unknown
+end
+
+mutable struct DRegressor2 <: Deterministic end
+MLJBase.target_scitype(::Type{<:DRegressor2}) =
+    AbstractVector{<:Continuous}
+
+@test MLJBase.guess_model_target_observation_scitype(DRegressor2()) == Continuous
+
+@testset "pretty" begin
+    X = (x=fill(1, 3), y=fill(2, 3))
+    io = IOBuffer()
+    pretty(X)
+    pretty(io, X)
+    str = take!(io) |> String
+    @test contains(str, "x")
+    @test contains(str, "y")
+    @test contains(str, "│")
+end
+
 end # module
 true