From 57c172af2a592e31242037b8de4cd3e734600344 Mon Sep 17 00:00:00 2001 From: Thibaut Lienart Date: Fri, 25 Jan 2019 15:34:18 +1100 Subject: [PATCH 01/13] making kmeans take AbstractMatrix instead of Matrix, see #136 and #129 --- src/kmeans.jl | 48 ++++++++++++++++++++++++---------------------- src/seeding.jl | 41 +++++++++++++++++++-------------------- test/kmeans.jl | 50 ++++++++++++++++++++++++++++++++++++++++++++++++ test/seeding.jl | 51 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 145 insertions(+), 45 deletions(-) diff --git a/src/kmeans.jl b/src/kmeans.jl index b3ef9224..660ca549 100644 --- a/src/kmeans.jl +++ b/src/kmeans.jl @@ -18,9 +18,8 @@ const _kmeans_default_maxiter = 100 const _kmeans_default_tol = 1.0e-6 const _kmeans_default_display = :none -function kmeans!(X::Matrix{T}, centers::Matrix{T}; - weights=nothing, - maxiter::Integer=_kmeans_default_maxiter, +function kmeans!(X::AbstractMatrix{T}, centers::AbstractMatrix{T}; + weights=nothing, maxiter::Integer=_kmeans_default_maxiter, tol::Real=_kmeans_default_tol, display::Symbol=_kmeans_default_display, distance::SemiMetric=SqEuclidean()) where T<:AbstractFloat @@ -40,7 +39,7 @@ function kmeans!(X::Matrix{T}, centers::Matrix{T}; round(Int, maxiter), tol, display_level(display), distance) end -function kmeans(X::Matrix{T}, k::Int; +function kmeans(X::AbstractMatrix{T}, k::Int; weights=nothing, init=_kmeans_default_init, maxiter::Integer=_kmeans_default_maxiter, @@ -64,17 +63,18 @@ end # core k-means skeleton function _kmeans!( - x::Matrix{T}, # in: sample matrix (d x n) - w::Union{Nothing, Vector{T}}, # in: sample weights (n) - centers::Matrix{T}, # in/out: matrix of centers (d x k) - assignments::Vector{Int}, # out: vector of assignments (n) - costs::Vector{T}, # out: costs of the resultant assignments (n) - counts::Vector{Int}, # out: the number of samples assigned to each cluster (k) - cweights::Vector{Float64}, # out: the weights of each cluster - maxiter::Int, # in: maximum number of iterations - tol::Real, # in: tolerance of change at convergence - displevel::Int, # in: the level of display - distance::SemiMetric) where T<:AbstractFloat # in: function to calculate the distance with + x::AbstractMatrix{T}, # in: sample matrix (d x n) + w::Union{Nothing, Vector{T}}, # in: sample weights (n) + centers::AbstractMatrix{T}, # in/out: matrix of centers (d x k) + assignments::Vector{Int}, # out: vector of assignments (n) + costs::Vector{T}, # out: costs of the resultant assignments (n) + counts::Vector{Int}, # out: the number of samples assigned to each cluster (k) + cweights::Vector{Float64}, # out: the weights of each cluster + maxiter::Int, # in: maximum number of iterations + tol::Real, # in: tolerance of change at convergence + displevel::Int, # in: the level of display + distance::SemiMetric # in: function to calculate the distance with + ) where T<:AbstractFloat # initialize @@ -157,6 +157,8 @@ function _kmeans!( end end + centers isa Matrix || (centers = copy(centers)) + return KmeansResult(centers, assignments, costs, counts, cweights, Float64(objv), t, converged) end @@ -238,11 +240,11 @@ end # (specific to the case where samples are not weighted) # function update_centers!( - x::Matrix{T}, # in: sample matrix (d x n) - w::Nothing, # in: sample weights + x::AbstractMatrix{T}, # in: sample matrix (d x n) + w::Nothing, # in: sample weights assignments::Vector{Int}, # in: assignments (n) to_update::Vector{Bool}, # in: whether a center needs update (k) - centers::Matrix{T}, # out: updated centers (d x k) + centers::AbstractMatrix{T}, # out: updated centers (d x k) cweights::Vector) where T<:AbstractFloat # out: updated cluster weights (k) d::Int = size(x, 1) @@ -292,11 +294,11 @@ end # (specific to the case where samples are weighted) # function update_centers!( - x::Matrix{T}, # in: sample matrix (d x n) + x::AbstractMatrix{T}, # in: sample matrix (d x n) weights::Vector{T}, # in: sample weights (n) assignments::Vector{Int}, # in: assignments (n) to_update::Vector{Bool}, # in: whether a center needs update (k) - centers::Matrix{T}, # out: updated centers (d x k) + centers::AbstractMatrix{T}, # out: updated centers (d x k) cweights::Vector # out: updated cluster weights (k) ) where T<:AbstractFloat @@ -342,9 +344,9 @@ end # Re-picks centers that get no samples assigned to them. # function repick_unused_centers( - x::Matrix{T}, # in: the sample set (d x n) - costs::Vector{T}, # in: the current assignment costs (n) - centers::Matrix{T}, # to be updated: the centers (d x k) + x::AbstractMatrix{T}, # in: the sample set (d x n) + costs::Vector{T}, # in: the current assignment costs (n) + centers::AbstractMatrix{T}, # to be updated: the centers (d x k) unused::Vector{Int}) where T<:AbstractFloat # in: the set of indices of centers to be updated # pick new centers using a scheme like kmeans++ diff --git a/src/seeding.jl b/src/seeding.jl index d08a5969..f5dfa321 100644 --- a/src/seeding.jl +++ b/src/seeding.jl @@ -19,10 +19,10 @@ abstract type SeedingAlgorithm end -initseeds(alg::SeedingAlgorithm, X::RealMatrix, k::Integer) = +initseeds(alg::SeedingAlgorithm, X::AbstractMatrix{T}, k::Integer) where T<:AbstractFloat = initseeds!(Vector{Int}(undef, k), alg, X) -initseeds_by_costs(alg::SeedingAlgorithm, costs::RealMatrix, k::Integer) = +initseeds_by_costs(alg::SeedingAlgorithm, costs::AbstractMatrix{T}, k::Integer) where T<:AbstractFloat = initseeds_by_costs!(Vector{Int}(undef, k), alg, costs) seeding_algorithm(s::Symbol) = @@ -31,16 +31,16 @@ seeding_algorithm(s::Symbol) = s == :kmcen ? KmCentralityAlg() : error("Unknown seeding algorithm $s") -initseeds(algname::Symbol, X::RealMatrix, k::Integer) = +initseeds(algname::Symbol, X::AbstractMatrix{T}, k::Integer) where T<:AbstractFloat = initseeds(seeding_algorithm(algname), X, k)::Vector{Int} -initseeds_by_costs(algname::Symbol, costs::RealMatrix, k::Integer) = +initseeds_by_costs(algname::Symbol, costs::AbstractMatrix{T}, k::Integer) where T<:AbstractFloat = initseeds_by_costs(seeding_algorithm(algname), costs, k) -initseeds(iseeds::Vector{Int}, X::RealMatrix, k::Integer) = iseeds -initseeds_by_costs(iseeds::Vector{Int}, costs::RealMatrix, k::Integer) = iseeds +initseeds(iseeds::Vector{Int}, X::AbstractMatrix{T}, k::Integer) where T<:AbstractFloat = iseeds +initseeds_by_costs(iseeds::Vector{Int}, costs::AbstractMatrix{T}, k::Integer) where T<:AbstractFloat = iseeds -function copyseeds!(S::DenseMatrix, X::DenseMatrix, iseeds::AbstractVector) +function copyseeds!(S::DenseMatrix, X::AbstractMatrix{T}, iseeds::AbstractVector) where T<:AbstractFloat d = size(X, 1) n = size(X, 2) k = length(iseeds) @@ -53,7 +53,7 @@ function copyseeds!(S::DenseMatrix, X::DenseMatrix, iseeds::AbstractVector) return S end -copyseeds(X::DenseMatrix{T}, iseeds::AbstractVector) where {T} = +copyseeds(X::AbstractMatrix{T}, iseeds::AbstractVector) where {T} = copyseeds!(Matrix{T}(undef, size(X,1), length(iseeds)), X, iseeds) function check_seeding_args(n::Integer, k::Integer) @@ -69,11 +69,9 @@ end mutable struct RandSeedAlg <: SeedingAlgorithm end -initseeds!(iseeds::IntegerVector, alg::RandSeedAlg, X::RealMatrix) = - sample!(1:size(X,2), iseeds; replace=false) +initseeds!(iseeds::IntegerVector, alg::RandSeedAlg, X::AbstractMatrix{T}) where T<:AbstractFloat = sample!(1:size(X,2), iseeds; replace=false) -initseeds_by_costs!(iseeds::IntegerVector, alg::RandSeedAlg, X::RealMatrix) = - sample!(1:size(X,2), iseeds; replace=false) +initseeds_by_costs!(iseeds::IntegerVector, alg::RandSeedAlg, X::AbstractMatrix{T}) where T<:AbstractFloat = sample!(1:size(X,2), iseeds; replace=false) # Kmeans++ seeding @@ -85,7 +83,7 @@ initseeds_by_costs!(iseeds::IntegerVector, alg::RandSeedAlg, X::RealMatrix) = mutable struct KmppAlg <: SeedingAlgorithm end -function initseeds!(iseeds::IntegerVector, alg::KmppAlg, X::RealMatrix, metric::PreMetric) +function initseeds!(iseeds::IntegerVector, alg::KmppAlg, X::AbstractMatrix{T}, metric::PreMetric) where T <: AbstractFloat n = size(X, 2) k = length(iseeds) check_seeding_args(n, k) @@ -115,10 +113,9 @@ function initseeds!(iseeds::IntegerVector, alg::KmppAlg, X::RealMatrix, metric:: return iseeds end -initseeds!(iseeds::IntegerVector, alg::KmppAlg, X::RealMatrix) = - initseeds!(iseeds, alg, X, SqEuclidean()) +initseeds!(iseeds::IntegerVector, alg::KmppAlg, X::AbstractMatrix{T}) where T<:AbstractFloat = initseeds!(iseeds, alg, X, SqEuclidean()) -function initseeds_by_costs!(iseeds::IntegerVector, alg::KmppAlg, costs::RealMatrix) +function initseeds_by_costs!(iseeds::IntegerVector, alg::KmppAlg, costs::AbstractMatrix{T}) where T<:AbstractFloat n = size(costs, 1) k = length(iseeds) check_seeding_args(n, k) @@ -145,8 +142,10 @@ function initseeds_by_costs!(iseeds::IntegerVector, alg::KmppAlg, costs::RealMat return iseeds end -kmpp(X::RealMatrix, k::Int) = initseeds(KmppAlg(), X, k) -kmpp_by_costs(costs::RealMatrix, k::Int) = initseeds(KmppAlg(), costs, k) +kmpp(X::AbstractMatrix{T}, k::Int) where T<:AbstractFloat = + initseeds(KmppAlg(), X, k) +kmpp_by_costs(costs::AbstractMatrix{T}, k::Int) where T<:AbstractFloat = + initseeds(KmppAlg(), costs, k) # K-medoids initialization based on centrality @@ -158,7 +157,7 @@ kmpp_by_costs(costs::RealMatrix, k::Int) = initseeds(KmppAlg(), costs, k) mutable struct KmCentralityAlg <: SeedingAlgorithm end -function initseeds_by_costs!(iseeds::IntegerVector, alg::KmCentralityAlg, costs::RealMatrix) +function initseeds_by_costs!(iseeds::IntegerVector, alg::KmCentralityAlg, costs::AbstractMatrix{T}) where T<:AbstractFloat n = size(costs, 1) k = length(iseeds) k <= n || error("Attempted to select more seeds than samples.") @@ -183,8 +182,8 @@ function initseeds_by_costs!(iseeds::IntegerVector, alg::KmCentralityAlg, costs: return iseeds end -initseeds!(iseeds::IntegerVector, alg::KmCentralityAlg, X::RealMatrix, metric::PreMetric) = +initseeds!(iseeds::IntegerVector, alg::KmCentralityAlg, X::AbstractMatrix{T}, metric::PreMetric) where T<:AbstractFloat = initseeds_by_costs!(iseeds, alg, pairwise(metric, X)) -initseeds!(iseeds::IntegerVector, alg::KmCentralityAlg, X::RealMatrix) = +initseeds!(iseeds::IntegerVector, alg::KmCentralityAlg, X::AbstractMatrix{T}) where T<:AbstractFloat = initseeds!(iseeds, alg, X, SqEuclidean()) diff --git a/test/kmeans.jl b/test/kmeans.jl index 63c34f3a..23af7bb6 100644 --- a/test/kmeans.jl +++ b/test/kmeans.jl @@ -28,6 +28,7 @@ n = 1000 k = 10 x = rand(m, n) +xt = copy(transpose(x)) @testset "non-weighted" begin r = kmeans(x, k; maxiter=50) @@ -42,6 +43,19 @@ x = rand(m, n) @test sum(r.costs) ≈ r.totalcost end +@testset "non-weighted^T" begin + r = kmeans(xt', k; maxiter=50) + @test isa(r, KmeansResult{Float64}) + @test size(r.centers) == (m, k) + @test length(r.assignments) == n + @test all(a -> 1 <= a <= k, r.assignments) + @test length(r.costs) == n + @test length(r.counts) == k + @test sum(r.counts) == n + @test r.cweights == map(Float64, r.counts) + @test sum(r.costs) ≈ r.totalcost +end + @testset "non-weighted (float32)" begin r = kmeans(map(Float32, x), k; maxiter=50) @test isa(r, KmeansResult{Float32}) @@ -74,6 +88,25 @@ end @test dot(r.costs, w) ≈ r.totalcost end +@testset "weighted^T" begin + w = rand(n) + r = kmeans(xt', k; maxiter=50, weights=w) + @test isa(r, KmeansResult{Float64}) + @test size(r.centers) == (m, k) + @test length(r.assignments) == n + @test all(a -> 1 <= a <= k, r.assignments) + @test length(r.costs) == n + @test length(r.counts) == k + @test sum(r.counts) == n + + cw = zeros(k) + for i = 1:n + cw[r.assignments[i]] += w[i] + end + @test r.cweights ≈ cw + @test dot(r.costs, w) ≈ r.totalcost +end + @testset "custom distance" begin r = kmeans(x, k; maxiter=50, init=:kmcen, distance=MySqEuclidean()) r2 = kmeans(x, k; maxiter=50, init=:kmcen) @@ -91,4 +124,21 @@ end end end +@testset "custom distance^T" begin + r = kmeans(xt', k; maxiter=50, init=:kmcen, distance=MySqEuclidean()) + r2 = kmeans(xt', k; maxiter=50, init=:kmcen) + @test isa(r, KmeansResult{Float64}) + @test size(r.centers) == (m, k) + @test length(r.assignments) == n + @test all(a -> 1 <= a <= k, r.assignments) + @test length(r.costs) == n + @test length(r.counts) == k + @test sum(r.counts) == n + @test r.cweights == map(Float64, r.counts) + @test sum(r.costs) ≈ r.totalcost + for fn in fieldnames(typeof(r)) + @test getfield(r, fn) == getfield(r2, fn) + end +end + end diff --git a/test/seeding.jl b/test/seeding.jl index 6ea1fef2..6eaf9ec6 100644 --- a/test/seeding.jl +++ b/test/seeding.jl @@ -12,7 +12,7 @@ Random.seed!(34568) alldistinct(x::Vector{Int}) = (length(Set(x)) == length(x)) -function min_interdist(X::Matrix) +function min_interdist(X::AbstractMatrix) dists = pairwise(SqEuclidean(), X) n = size(X, 2) r = Inf @@ -31,6 +31,9 @@ k = 5 X = rand(d, n) C = pairwise(SqEuclidean(), X) +Xt = copy(transpose(X)) +Ct = copy(transpose(C)) + md0 = min_interdist(X) @testset "RandSeed" begin @@ -47,6 +50,20 @@ md0 = min_interdist(X) @test R == X[:, iseeds] end +@testset "RandSeed^T" begin + iseeds = initseeds(RandSeedAlg(), Xt', k) + @test length(iseeds) == k + @test alldistinct(iseeds) + + iseeds = initseeds_by_costs(RandSeedAlg(), Ct', k) + @test length(iseeds) == k + @test alldistinct(iseeds) + + R = copyseeds(Xt', iseeds) + @test isa(R, Matrix{Float64}) + @test R == (Xt')[:, iseeds] +end + @testset "Kmpp" begin iseeds = initseeds(KmppAlg(), X, k) @test length(iseeds) == k @@ -67,6 +84,26 @@ end @test alldistinct(iseeds) end +@testset "Kmpp^T" begin + iseeds = initseeds(KmppAlg(), Xt', k) + @test length(iseeds) == k + @test alldistinct(iseeds) + + iseeds = initseeds_by_costs(KmppAlg(), Ct', k) + @test length(iseeds) == k + @test alldistinct(iseeds) + + @test min_interdist((Xt')[:, iseeds]) > 20 * md0 + + iseeds = kmpp(Xt', k) + @test length(iseeds) == k + @test alldistinct(iseeds) + + iseeds = kmpp_by_costs(Ct', k) + @test length(iseeds) == k + @test alldistinct(iseeds) +end + @testset "Kmcentrality" begin iseeds = initseeds(KmCentralityAlg(), X, k) @test length(iseeds) == k @@ -79,4 +116,16 @@ end @test min_interdist(X[:, iseeds]) > 2 * md0 end +@testset "Kmcentrality^T" begin + iseeds = initseeds(KmCentralityAlg(), Xt', k) + @test length(iseeds) == k + @test alldistinct(iseeds) + + iseeds = initseeds_by_costs(KmCentralityAlg(), Ct', k) + @test length(iseeds) == k + @test alldistinct(iseeds) + + @test min_interdist((Xt')[:, iseeds]) > 2 * md0 +end + end From 4c7f218a1895304b23cd830d20fabfeca12e9e7b Mon Sep 17 00:00:00 2001 From: Thibaut Lienart Date: Sat, 26 Jan 2019 15:47:13 +1100 Subject: [PATCH 02/13] PR update based on feedback + cosmetics --- src/kmeans.jl | 181 ++++++++++++++++++++++-------------------------- src/seeding.jl | 58 +++++++++------- test/kmeans.jl | 75 +++++++------------- test/seeding.jl | 80 ++++++++++----------- 4 files changed, 176 insertions(+), 218 deletions(-) diff --git a/src/kmeans.jl b/src/kmeans.jl index 660ca549..1c7ab196 100644 --- a/src/kmeans.jl +++ b/src/kmeans.jl @@ -5,7 +5,7 @@ mutable struct KmeansResult{T<:AbstractFloat} <: ClusteringResult centers::Matrix{T} # cluster centers (d x k) assignments::Vector{Int} # assignments (n) - costs::Vector{T} # costs of the resultant assignments (n) + costs::Vector{Float64} # costs of the resultant assignments (n) counts::Vector{Int} # number of samples assigned to each cluster (k) cweights::Vector{Float64} # cluster weights (k) totalcost::Float64 # total cost (i.e. objective) (k) @@ -18,34 +18,33 @@ const _kmeans_default_maxiter = 100 const _kmeans_default_tol = 1.0e-6 const _kmeans_default_display = :none -function kmeans!(X::AbstractMatrix{T}, centers::AbstractMatrix{T}; +function kmeans!(X::AbstractMatrix{<:Real}, centers::AbstractMatrix{<:Real}; weights=nothing, maxiter::Integer=_kmeans_default_maxiter, tol::Real=_kmeans_default_tol, display::Symbol=_kmeans_default_display, - distance::SemiMetric=SqEuclidean()) where T<:AbstractFloat + distance::SemiMetric=SqEuclidean()) m, n = size(X) m2, k = size(centers) m == m2 || throw(DimensionMismatch("Inconsistent array dimensions.")) (2 <= k < n) || error("k must have 2 <= k < n.") - assignments = zeros(Int, n) - costs = zeros(T, n) + assignments = Vector{Int}(undef, n) + costs = Vector{Float64}(undef, n) counts = Vector{Int}(undef, k) cweights = Vector{Float64}(undef, k) - _kmeans!(X, conv_weights(T, n, weights), centers, - assignments, costs, counts, cweights, - round(Int, maxiter), tol, display_level(display), distance) + _kmeans!(X, conv_weights(Float64, n, weights), centers, assignments, + costs, counts, cweights, maxiter, tol, + display_level(display), distance) end -function kmeans(X::AbstractMatrix{T}, k::Int; - weights=nothing, - init=_kmeans_default_init, +function kmeans(X::AbstractMatrix{<:Real}, k::Int; + weights=nothing, init=_kmeans_default_init, maxiter::Integer=_kmeans_default_maxiter, tol::Real=_kmeans_default_tol, display::Symbol=_kmeans_default_display, - distance::SemiMetric=SqEuclidean()) where T<:AbstractFloat + distance::SemiMetric=SqEuclidean()) m, n = size(X) (2 <= k < n) || error("k must have 2 <= k < n.") @@ -63,30 +62,30 @@ end # core k-means skeleton function _kmeans!( - x::AbstractMatrix{T}, # in: sample matrix (d x n) - w::Union{Nothing, Vector{T}}, # in: sample weights (n) - centers::AbstractMatrix{T}, # in/out: matrix of centers (d x k) + X::AbstractMatrix{<:Real}, # in: sample matrix (d x n) + w::Union{Nothing, AbstractVector{<:Real}}, # in: sample weights (n) + centers::AbstractMatrix{<:Real}, # in/out: matrix of centers (d x k) assignments::Vector{Int}, # out: vector of assignments (n) - costs::Vector{T}, # out: costs of the resultant assignments (n) - counts::Vector{Int}, # out: the number of samples assigned to each cluster (k) - cweights::Vector{Float64}, # out: the weights of each cluster - maxiter::Int, # in: maximum number of iterations + costs::Vector{Float64}, # out: costs of the resultant assignments (n) + counts::Vector{Int}, # out: # samples assigned to each cluster (k) + cweights::Vector{Float64}, # out: weights of each cluster + maxiter::Integer, # in: maximum number of iterations tol::Real, # in: tolerance of change at convergence displevel::Int, # in: the level of display distance::SemiMetric # in: function to calculate the distance with - ) where T<:AbstractFloat + ) # initialize - k = size(centers, 2) to_update = Vector{Bool}(undef, k) # indicators of whether a center needs to be updated - unused = Int[] + unused = Vector{Int}() num_affected::Int = k # number of centers, to which the distances need to be recomputed - dmat = pairwise(distance, centers, x) - dmat = convert(Array{T}, dmat) #Can be removed if one day Distance.result_type(SqEuclidean(), T, T) == T - update_assignments!(dmat, true, assignments, costs, counts, to_update, unused) - objv = w === nothing ? sum(costs) : dot(w, costs) + dmat = pairwise(distance, centers, X) + update_assignments!(dmat, true, assignments, costs, counts, + to_update, unused) + + objv = (w === nothing) ? sum(costs) : dot(w, costs) # main loop t = 0 @@ -101,37 +100,36 @@ function _kmeans!( t = t + 1 # update (affected) centers - - update_centers!(x, w, assignments, to_update, centers, cweights) + update_centers!(X, w, assignments, to_update, centers, cweights) if !isempty(unused) - repick_unused_centers(x, costs, centers, unused) + repick_unused_centers(X, costs, centers, unused) end # update pairwise distance matrix - if !isempty(unused) to_update[unused] .= true end if t == 1 || num_affected > 0.75 * k - pairwise!(dmat, distance, centers, x) + pairwise!(dmat, distance, centers, X) else # if only a small subset is affected, only compute for that subset affected_inds = findall(to_update) - dmat_p = pairwise(distance, centers[:, affected_inds], x) + dmat_p = pairwise(distance, centers[:, affected_inds], X) dmat[affected_inds, :] .= dmat_p end # update assignments - update_assignments!(dmat, false, assignments, costs, counts, to_update, unused) + update_assignments!(dmat, false, assignments, costs, counts, + to_update, unused) + num_affected = sum(to_update) + length(unused) # compute change of objective and determine convergence - prev_objv = objv - objv = w === nothing ? sum(costs) : dot(w, costs) + objv = (w === nothing) ? sum(costs) : dot(w, costs) objv_change = objv - prev_objv if objv_change > tol @@ -157,10 +155,8 @@ function _kmeans!( end end - centers isa Matrix || (centers = copy(centers)) - - return KmeansResult(centers, assignments, costs, counts, cweights, - Float64(objv), t, converged) + return KmeansResult(convert(Matrix, centers), assignments, costs, counts, + cweights, Float64(objv), t, converged) end @@ -169,15 +165,15 @@ end # an updated (squared) distance matrix # function update_assignments!( - dmat::Matrix{T}, # in: distance matrix (k x n) - is_init::Bool, # in: whether it is the initial run - assignments::Vector{Int}, # out: assignment vector (n) - costs::Vector{T}, # out: costs of the resultant assignment (n) - counts::Vector{Int}, # out: number of samples assigned to each cluster (k) - to_update::Vector{Bool}, # out: whether a center needs update (k) - unused::Vector{Int}) where T<:AbstractFloat # out: the list of centers get no samples assigned to it + dmat::Matrix{<:Real}, # in: distance matrix (k x n) + is_init::Bool, # in: whether it is the initial run + assignments::Vector{Int}, # out: assignment vector (n) + costs::Vector{Float64}, # out: costs of the resultant assignment (n) + counts::Vector{Int}, # out: # samples assigned to each cluster (k) + to_update::Vector{Bool}, # out: whether a center needs update (k) + unused::Vector{Int}) # out: list of centers with no samples - k::Int, n::Int = size(dmat) + k, n = size(dmat) # re-initialize the counting vector fill!(counts, 0) @@ -192,12 +188,12 @@ function update_assignments!( end # process each sample - @inbounds for j = 1 : n + @inbounds for j = 1:n # find the closest cluster to the i-th sample - a::Int = 1 - c::T = dmat[1, j] - for i = 2 : k + a = 1 + c = dmat[1, j] + for i = 2:k ci = dmat[i, j] if ci < c a = i @@ -226,7 +222,7 @@ function update_assignments!( # look for centers that have no associated samples - for i = 1 : k + for i = 1:k if counts[i] == 0 push!(unused, i) to_update[i] = false # this is handled using different mechanism @@ -240,36 +236,31 @@ end # (specific to the case where samples are not weighted) # function update_centers!( - x::AbstractMatrix{T}, # in: sample matrix (d x n) - w::Nothing, # in: sample weights - assignments::Vector{Int}, # in: assignments (n) - to_update::Vector{Bool}, # in: whether a center needs update (k) - centers::AbstractMatrix{T}, # out: updated centers (d x k) - cweights::Vector) where T<:AbstractFloat # out: updated cluster weights (k) + X::AbstractMatrix{<:Real}, # in: sample matrix (d x n) + w::Nothing, # in: sample weights + assignments::Vector{Int}, # in: assignments (n) + to_update::Vector{Bool}, # in: whether a center needs update (k) + centers::AbstractMatrix{<:Real}, # out: updated centers (d x k) + cweights::Vector{Float64}) # out: updated cluster weights (k) - d::Int = size(x, 1) - n::Int = size(x, 2) - k::Int = size(centers, 2) + (d, n), k = size(X), size(centers, 2) # initialize center weights - for i = 1 : k - if to_update[i] - cweights[i] = 0. - end - end + cweights[to_update] .= 0.0 # accumulate columns - @inbounds for j = 1 : n + @inbounds for j = 1:n cj = assignments[j] 1 <= cj <= k || error("assignment out of boundary.") + if to_update[cj] if cweights[cj] > 0 for i = 1:d - centers[i, cj] += x[i, j] + centers[i, cj] += X[i, j] end else for i = 1:d - centers[i, cj] = x[i, j] + centers[i, cj] = X[i, j] end end cweights[cj] += 1 @@ -277,12 +268,12 @@ function update_centers!( end # sum ==> mean - for j = 1:k + @inbounds for j = 1:k if to_update[j] - @inbounds cj::T = 1 / cweights[j] - vj = view(centers,:,j) + cj = 1.0 / cweights[j] + vj = view(centers, :, j) for i = 1:d - @inbounds vj[i] *= cj + vj[i] *= cj end end end @@ -294,37 +285,33 @@ end # (specific to the case where samples are weighted) # function update_centers!( - x::AbstractMatrix{T}, # in: sample matrix (d x n) - weights::Vector{T}, # in: sample weights (n) - assignments::Vector{Int}, # in: assignments (n) - to_update::Vector{Bool}, # in: whether a center needs update (k) - centers::AbstractMatrix{T}, # out: updated centers (d x k) - cweights::Vector # out: updated cluster weights (k) -) where T<:AbstractFloat - - d::Int = size(x, 1) - n::Int = size(x, 2) - k::Int = size(centers, 2) + X::AbstractMatrix{<:Real}, # in: sample matrix (d x n) + weights::AbstractVector{<:Real}, # in: sample weights (n) + assignments::Vector{Int}, # in: assignments (n) + to_update::Vector{Bool}, # in: whether a center needs update (k) + centers::AbstractMatrix{<:Real}, # out: updated centers (d x k) + cweights::Vector{Float64}) # out: updated cluster weights (k) + + (d, n), k = size(X), size(centers, 2) # initialize center weights cweights[to_update] .= 0.0 # accumulate columns # accumulate_cols_u!(centers, cweights, x, assignments, weights, to_update) - for j = 1 : n + for j = 1:n @inbounds wj = weights[j] - if wj > 0 @inbounds cj = assignments[j] 1 <= cj <= k || error("assignment out of boundary.") if to_update[cj] rj = view(centers, :, cj) - xj = view(x, :, j) + xj = view(X, :, j) if cweights[cj] > 0 @inbounds rj .+= xj * wj else - @inbounds rj .= xj * wj + rj .= xj * wj end cweights[cj] += wj end @@ -334,7 +321,7 @@ function update_centers!( # sum ==> mean for j = 1:k if to_update[j] - @inbounds centers[:, j] .*= 1 / cweights[j] + @inbounds centers[:, j] .*= 1.0 / cweights[j] end end end @@ -344,23 +331,23 @@ end # Re-picks centers that get no samples assigned to them. # function repick_unused_centers( - x::AbstractMatrix{T}, # in: the sample set (d x n) - costs::Vector{T}, # in: the current assignment costs (n) - centers::AbstractMatrix{T}, # to be updated: the centers (d x k) - unused::Vector{Int}) where T<:AbstractFloat # in: the set of indices of centers to be updated + X::AbstractMatrix{<:Real}, # in: the sample set (d x n) + costs::Vector{Float64}, # in: the current assignment costs (n) + centers::AbstractMatrix{<:Real}, # to be updated: the centers (d x k) + unused::Vector{Int}) # in: set of indices of centers to be updated # pick new centers using a scheme like kmeans++ ds = similar(costs) tcosts = copy(costs) - n = size(x, 2) + n = size(X, 2) for i in unused j = wsample(1:n, tcosts) tcosts[j] = 0 - v = x[:,j] - centers[:,i] = v + v = X[:, j] + centers[:, i] = v - colwise!(ds, SqEuclidean(), v, x) + colwise!(ds, SqEuclidean(), v, X) tcosts = min(tcosts, ds) end end diff --git a/src/seeding.jl b/src/seeding.jl index f5dfa321..38334740 100644 --- a/src/seeding.jl +++ b/src/seeding.jl @@ -19,10 +19,10 @@ abstract type SeedingAlgorithm end -initseeds(alg::SeedingAlgorithm, X::AbstractMatrix{T}, k::Integer) where T<:AbstractFloat = +initseeds(alg::SeedingAlgorithm, X::AbstractMatrix{<:Real}, k::Integer) = initseeds!(Vector{Int}(undef, k), alg, X) -initseeds_by_costs(alg::SeedingAlgorithm, costs::AbstractMatrix{T}, k::Integer) where T<:AbstractFloat = +initseeds_by_costs(alg::SeedingAlgorithm, costs::AbstractMatrix{<:Real}, k::Integer) = initseeds_by_costs!(Vector{Int}(undef, k), alg, costs) seeding_algorithm(s::Symbol) = @@ -31,16 +31,17 @@ seeding_algorithm(s::Symbol) = s == :kmcen ? KmCentralityAlg() : error("Unknown seeding algorithm $s") -initseeds(algname::Symbol, X::AbstractMatrix{T}, k::Integer) where T<:AbstractFloat = +initseeds(algname::Symbol, X::AbstractMatrix{<:Real}, k::Integer) = initseeds(seeding_algorithm(algname), X, k)::Vector{Int} -initseeds_by_costs(algname::Symbol, costs::AbstractMatrix{T}, k::Integer) where T<:AbstractFloat = +initseeds_by_costs(algname::Symbol, costs::AbstractMatrix{<:Real}, k::Integer) = initseeds_by_costs(seeding_algorithm(algname), costs, k) -initseeds(iseeds::Vector{Int}, X::AbstractMatrix{T}, k::Integer) where T<:AbstractFloat = iseeds -initseeds_by_costs(iseeds::Vector{Int}, costs::AbstractMatrix{T}, k::Integer) where T<:AbstractFloat = iseeds +initseeds(iseeds::Vector{Int}, X::AbstractMatrix{<:Real}, k::Integer) = iseeds +initseeds_by_costs(iseeds::Vector{Int}, costs::AbstractMatrix{<:Real}, k::Integer) = iseeds -function copyseeds!(S::DenseMatrix, X::AbstractMatrix{T}, iseeds::AbstractVector) where T<:AbstractFloat +function copyseeds!(S::DenseMatrix, X::AbstractMatrix{<:Real}, + iseeds::AbstractVector) d = size(X, 1) n = size(X, 2) k = length(iseeds) @@ -53,8 +54,8 @@ function copyseeds!(S::DenseMatrix, X::AbstractMatrix{T}, iseeds::AbstractVector return S end -copyseeds(X::AbstractMatrix{T}, iseeds::AbstractVector) where {T} = - copyseeds!(Matrix{T}(undef, size(X,1), length(iseeds)), X, iseeds) +copyseeds(X::AbstractMatrix{<:Real}, iseeds::AbstractVector) = + copyseeds!(Matrix{eltype(X)}(undef, size(X,1), length(iseeds)), X, iseeds) function check_seeding_args(n::Integer, k::Integer) k >= 1 || error("The number of seeds must be positive.") @@ -69,9 +70,9 @@ end mutable struct RandSeedAlg <: SeedingAlgorithm end -initseeds!(iseeds::IntegerVector, alg::RandSeedAlg, X::AbstractMatrix{T}) where T<:AbstractFloat = sample!(1:size(X,2), iseeds; replace=false) +initseeds!(iseeds::IntegerVector, alg::RandSeedAlg, X::AbstractMatrix{<:Real}) = sample!(1:size(X, 2), iseeds; replace=false) -initseeds_by_costs!(iseeds::IntegerVector, alg::RandSeedAlg, X::AbstractMatrix{T}) where T<:AbstractFloat = sample!(1:size(X,2), iseeds; replace=false) +initseeds_by_costs!(iseeds::IntegerVector, alg::RandSeedAlg, X::AbstractMatrix{<:Real}) = sample!(1:size(X,2), iseeds; replace=false) # Kmeans++ seeding @@ -83,7 +84,9 @@ initseeds_by_costs!(iseeds::IntegerVector, alg::RandSeedAlg, X::AbstractMatrix{T mutable struct KmppAlg <: SeedingAlgorithm end -function initseeds!(iseeds::IntegerVector, alg::KmppAlg, X::AbstractMatrix{T}, metric::PreMetric) where T <: AbstractFloat +function initseeds!(iseeds::IntegerVector, alg::KmppAlg, + X::AbstractMatrix{<:Real}, metric::PreMetric) + n = size(X, 2) k = length(iseeds) check_seeding_args(n, k) @@ -93,7 +96,7 @@ function initseeds!(iseeds::IntegerVector, alg::KmppAlg, X::AbstractMatrix{T}, m iseeds[1] = p if k > 1 - mincosts = colwise(metric, X, view(X,:,p)) + mincosts = colwise(metric, X, view(X, :, p)) mincosts[p] = 0 # pick remaining (with a chance proportional to mincosts) @@ -103,8 +106,8 @@ function initseeds!(iseeds::IntegerVector, alg::KmppAlg, X::AbstractMatrix{T}, m iseeds[j] = p # update mincosts - c = view(X,:,p) - colwise!(tmpcosts, metric, X, view(X,:,p)) + c = view(X, :, p) + colwise!(tmpcosts, metric, X, view(X, :, p)) updatemin!(mincosts, tmpcosts) mincosts[p] = 0 end @@ -113,9 +116,12 @@ function initseeds!(iseeds::IntegerVector, alg::KmppAlg, X::AbstractMatrix{T}, m return iseeds end -initseeds!(iseeds::IntegerVector, alg::KmppAlg, X::AbstractMatrix{T}) where T<:AbstractFloat = initseeds!(iseeds, alg, X, SqEuclidean()) +initseeds!(iseeds::IntegerVector, alg::KmppAlg, X::AbstractMatrix{<:Real}) = + initseeds!(iseeds, alg, X, SqEuclidean()) + +function initseeds_by_costs!(iseeds::IntegerVector, alg::KmppAlg, + costs::AbstractMatrix{<:Real}) -function initseeds_by_costs!(iseeds::IntegerVector, alg::KmppAlg, costs::AbstractMatrix{T}) where T<:AbstractFloat n = size(costs, 1) k = length(iseeds) check_seeding_args(n, k) @@ -125,7 +131,7 @@ function initseeds_by_costs!(iseeds::IntegerVector, alg::KmppAlg, costs::Abstrac iseeds[1] = p if k > 1 - mincosts = copy(view(costs,:,p)) + mincosts = copy(view(costs, :, p)) mincosts[p] = 0 # pick remaining (with a chance proportional to mincosts) @@ -134,7 +140,7 @@ function initseeds_by_costs!(iseeds::IntegerVector, alg::KmppAlg, costs::Abstrac iseeds[j] = p # update mincosts - updatemin!(mincosts, view(costs,:,p)) + updatemin!(mincosts, view(costs, :, p)) mincosts[p] = 0 end end @@ -142,10 +148,8 @@ function initseeds_by_costs!(iseeds::IntegerVector, alg::KmppAlg, costs::Abstrac return iseeds end -kmpp(X::AbstractMatrix{T}, k::Int) where T<:AbstractFloat = - initseeds(KmppAlg(), X, k) -kmpp_by_costs(costs::AbstractMatrix{T}, k::Int) where T<:AbstractFloat = - initseeds(KmppAlg(), costs, k) +kmpp(X::AbstractMatrix{<:Real}, k::Int) = initseeds(KmppAlg(), X, k) +kmpp_by_costs(costs::AbstractMatrix{<:Real}, k::Int) = initseeds(KmppAlg(), costs, k) # K-medoids initialization based on centrality @@ -157,7 +161,9 @@ kmpp_by_costs(costs::AbstractMatrix{T}, k::Int) where T<:AbstractFloat = mutable struct KmCentralityAlg <: SeedingAlgorithm end -function initseeds_by_costs!(iseeds::IntegerVector, alg::KmCentralityAlg, costs::AbstractMatrix{T}) where T<:AbstractFloat +function initseeds_by_costs!(iseeds::IntegerVector, alg::KmCentralityAlg, + costs::AbstractMatrix{<:Real}) + n = size(costs, 1) k = length(iseeds) k <= n || error("Attempted to select more seeds than samples.") @@ -182,8 +188,8 @@ function initseeds_by_costs!(iseeds::IntegerVector, alg::KmCentralityAlg, costs: return iseeds end -initseeds!(iseeds::IntegerVector, alg::KmCentralityAlg, X::AbstractMatrix{T}, metric::PreMetric) where T<:AbstractFloat = +initseeds!(iseeds::IntegerVector, alg::KmCentralityAlg, X::AbstractMatrix{<:Real}, metric::PreMetric) = initseeds_by_costs!(iseeds, alg, pairwise(metric, X)) -initseeds!(iseeds::IntegerVector, alg::KmCentralityAlg, X::AbstractMatrix{T}) where T<:AbstractFloat = +initseeds!(iseeds::IntegerVector, alg::KmCentralityAlg, X::AbstractMatrix{<:Real}) = initseeds!(iseeds, alg, X, SqEuclidean()) diff --git a/test/kmeans.jl b/test/kmeans.jl index 23af7bb6..bd6b0c90 100644 --- a/test/kmeans.jl +++ b/test/kmeans.jl @@ -3,6 +3,7 @@ using Test using Clustering using Distances +using Random using LinearAlgebra import Distances.pairwise! @@ -30,7 +31,11 @@ k = 10 x = rand(m, n) xt = copy(transpose(x)) +equal_kmresults(km1::KmeansResult, km2::KmeansResult) = + all(getfield(km1, η) == getfield(km2, η) for η ∈ fieldnames(KmeansResult)) + @testset "non-weighted" begin + Random.seed!(34568) r = kmeans(x, k; maxiter=50) @test isa(r, KmeansResult{Float64}) @test size(r.centers) == (m, k) @@ -41,23 +46,17 @@ xt = copy(transpose(x)) @test sum(r.counts) == n @test r.cweights == map(Float64, r.counts) @test sum(r.costs) ≈ r.totalcost -end -@testset "non-weighted^T" begin - r = kmeans(xt', k; maxiter=50) - @test isa(r, KmeansResult{Float64}) - @test size(r.centers) == (m, k) - @test length(r.assignments) == n - @test all(a -> 1 <= a <= k, r.assignments) - @test length(r.costs) == n - @test length(r.counts) == k - @test sum(r.counts) == n - @test r.cweights == map(Float64, r.counts) - @test sum(r.costs) ≈ r.totalcost + Random.seed!(34568) + r_t = kmeans(xt', k; maxiter=50) + @test equal_kmresults(r, r_t) end @testset "non-weighted (float32)" begin - r = kmeans(map(Float32, x), k; maxiter=50) + Random.seed!(34568) + x32 = map(Float32, x) + x32t = copy(x32') + r = kmeans(x32, k; maxiter=50) @test isa(r, KmeansResult{Float32}) @test size(r.centers) == (m, k) @test length(r.assignments) == n @@ -67,10 +66,15 @@ end @test sum(r.counts) == n @test r.cweights == map(Float64, r.counts) @test sum(r.costs) ≈ r.totalcost + + Random.seed!(34568) + r_t = kmeans(x32t', k; maxiter=50) + @test equal_kmresults(r, r_t) end @testset "weighted" begin w = rand(n) + Random.seed!(34568) r = kmeans(x, k; maxiter=50, weights=w) @test isa(r, KmeansResult{Float64}) @test size(r.centers) == (m, k) @@ -86,28 +90,14 @@ end end @test r.cweights ≈ cw @test dot(r.costs, w) ≈ r.totalcost -end - -@testset "weighted^T" begin - w = rand(n) - r = kmeans(xt', k; maxiter=50, weights=w) - @test isa(r, KmeansResult{Float64}) - @test size(r.centers) == (m, k) - @test length(r.assignments) == n - @test all(a -> 1 <= a <= k, r.assignments) - @test length(r.costs) == n - @test length(r.counts) == k - @test sum(r.counts) == n - cw = zeros(k) - for i = 1:n - cw[r.assignments[i]] += w[i] - end - @test r.cweights ≈ cw - @test dot(r.costs, w) ≈ r.totalcost + Random.seed!(34568) + r_t = kmeans(xt', k; maxiter=50, weights=w) + @test equal_kmresults(r, r_t) end @testset "custom distance" begin + Random.seed!(34568) r = kmeans(x, k; maxiter=50, init=:kmcen, distance=MySqEuclidean()) r2 = kmeans(x, k; maxiter=50, init=:kmcen) @test isa(r, KmeansResult{Float64}) @@ -119,26 +109,11 @@ end @test sum(r.counts) == n @test r.cweights == map(Float64, r.counts) @test sum(r.costs) ≈ r.totalcost - for fn in fieldnames(typeof(r)) - @test getfield(r, fn) == getfield(r2, fn) - end -end + @test equal_kmresults(r, r2) -@testset "custom distance^T" begin - r = kmeans(xt', k; maxiter=50, init=:kmcen, distance=MySqEuclidean()) - r2 = kmeans(xt', k; maxiter=50, init=:kmcen) - @test isa(r, KmeansResult{Float64}) - @test size(r.centers) == (m, k) - @test length(r.assignments) == n - @test all(a -> 1 <= a <= k, r.assignments) - @test length(r.costs) == n - @test length(r.counts) == k - @test sum(r.counts) == n - @test r.cweights == map(Float64, r.counts) - @test sum(r.costs) ≈ r.totalcost - for fn in fieldnames(typeof(r)) - @test getfield(r, fn) == getfield(r2, fn) - end + Random.seed!(34568) + r_t = kmeans(xt', k; maxiter=50, init=:kmcen, distance=MySqEuclidean()) + @test equal_kmresults(r, r_t) end end diff --git a/test/seeding.jl b/test/seeding.jl index 6eaf9ec6..5f5074e7 100644 --- a/test/seeding.jl +++ b/test/seeding.jl @@ -37,94 +37,84 @@ Ct = copy(transpose(C)) md0 = min_interdist(X) @testset "RandSeed" begin + Random.seed!(34568) iseeds = initseeds(RandSeedAlg(), X, k) @test length(iseeds) == k @test alldistinct(iseeds) + Random.seed!(34568) + iseeds_t = initseeds(RandSeedAlg(), Xt', k) + @test iseeds == iseeds_t + Random.seed!(34568) iseeds = initseeds_by_costs(RandSeedAlg(), C, k) @test length(iseeds) == k @test alldistinct(iseeds) + Random.seed!(34568) + iseeds_t = initseeds_by_costs(RandSeedAlg(), Ct', k) + @test iseeds == iseeds_t R = copyseeds(X, iseeds) @test isa(R, Matrix{Float64}) @test R == X[:, iseeds] -end - -@testset "RandSeed^T" begin - iseeds = initseeds(RandSeedAlg(), Xt', k) - @test length(iseeds) == k - @test alldistinct(iseeds) - - iseeds = initseeds_by_costs(RandSeedAlg(), Ct', k) - @test length(iseeds) == k - @test alldistinct(iseeds) - - R = copyseeds(Xt', iseeds) - @test isa(R, Matrix{Float64}) - @test R == (Xt')[:, iseeds] + R_t = copyseeds(Xt', iseeds) + @test R == R_t end @testset "Kmpp" begin + Random.seed!(34568) iseeds = initseeds(KmppAlg(), X, k) @test length(iseeds) == k @test alldistinct(iseeds) + Random.seed!(34568) + iseeds_t = initseeds(KmppAlg(), Xt', k) + @test iseeds == iseeds_t + Random.seed!(34568) iseeds = initseeds_by_costs(KmppAlg(), C, k) @test length(iseeds) == k @test alldistinct(iseeds) + Random.seed!(34568) + iseeds_t = initseeds_by_costs(KmppAlg(), Ct', k) + @test iseeds == iseeds_t @test min_interdist(X[:, iseeds]) > 20 * md0 + @test min_interdist((Xt')[:, iseeds]) > 20 * md0 + Random.seed!(34568) iseeds = kmpp(X, k) @test length(iseeds) == k @test alldistinct(iseeds) + Random.seed!(34568) + iseeds_t = kmpp(Xt', k) + @test iseeds_t == iseeds + Random.seed!(34568) iseeds = kmpp_by_costs(C, k) @test length(iseeds) == k @test alldistinct(iseeds) -end - -@testset "Kmpp^T" begin - iseeds = initseeds(KmppAlg(), Xt', k) - @test length(iseeds) == k - @test alldistinct(iseeds) - - iseeds = initseeds_by_costs(KmppAlg(), Ct', k) - @test length(iseeds) == k - @test alldistinct(iseeds) - - @test min_interdist((Xt')[:, iseeds]) > 20 * md0 - - iseeds = kmpp(Xt', k) - @test length(iseeds) == k - @test alldistinct(iseeds) - - iseeds = kmpp_by_costs(Ct', k) - @test length(iseeds) == k - @test alldistinct(iseeds) + Random.seed!(34568) + iseeds_t = kmpp_by_costs(Ct', k) + @test iseeds_t == iseeds end @testset "Kmcentrality" begin + Random.seed!(34568) iseeds = initseeds(KmCentralityAlg(), X, k) @test length(iseeds) == k @test alldistinct(iseeds) + Random.seed!(34568) + iseeds_t = initseeds(KmCentralityAlg(), Xt', k) + @test iseeds == iseeds_t + Random.seed!(34568) iseeds = initseeds_by_costs(KmCentralityAlg(), C, k) @test length(iseeds) == k @test alldistinct(iseeds) + Random.seed!(34568) + iseeds_t = initseeds_by_costs(KmCentralityAlg(), Ct', k) + @test iseeds == iseeds_t @test min_interdist(X[:, iseeds]) > 2 * md0 -end - -@testset "Kmcentrality^T" begin - iseeds = initseeds(KmCentralityAlg(), Xt', k) - @test length(iseeds) == k - @test alldistinct(iseeds) - - iseeds = initseeds_by_costs(KmCentralityAlg(), Ct', k) - @test length(iseeds) == k - @test alldistinct(iseeds) - @test min_interdist((Xt')[:, iseeds]) > 2 * md0 end From 2e95afb1fe88d5f1dc14c4886ee3318993176795 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 28 Jan 2019 11:40:15 +1100 Subject: [PATCH 03/13] Update src/kmeans.jl Co-Authored-By: tlienart --- src/kmeans.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kmeans.jl b/src/kmeans.jl index 1c7ab196..ede2c3f5 100644 --- a/src/kmeans.jl +++ b/src/kmeans.jl @@ -2,7 +2,7 @@ #### Interface -mutable struct KmeansResult{T<:AbstractFloat} <: ClusteringResult +mutable struct KmeansResult{T<:Real} <: ClusteringResult centers::Matrix{T} # cluster centers (d x k) assignments::Vector{Int} # assignments (n) costs::Vector{Float64} # costs of the resultant assignments (n) From be3575d7ba34fcfea4905e1494a60fdd5a9e96e1 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 28 Jan 2019 11:40:29 +1100 Subject: [PATCH 04/13] Update src/kmeans.jl Co-Authored-By: tlienart --- src/kmeans.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/kmeans.jl b/src/kmeans.jl index ede2c3f5..230ca681 100644 --- a/src/kmeans.jl +++ b/src/kmeans.jl @@ -243,7 +243,8 @@ function update_centers!( centers::AbstractMatrix{<:Real}, # out: updated centers (d x k) cweights::Vector{Float64}) # out: updated cluster weights (k) - (d, n), k = size(X), size(centers, 2) + d, n = size(X) + k = size(centers, 2) # initialize center weights cweights[to_update] .= 0.0 From d3f167aea49f07a2869cdeda0166edfcc50d48c3 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 28 Jan 2019 11:41:49 +1100 Subject: [PATCH 05/13] Update src/kmeans.jl Co-Authored-By: tlienart --- src/kmeans.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kmeans.jl b/src/kmeans.jl index 230ca681..a160fbd8 100644 --- a/src/kmeans.jl +++ b/src/kmeans.jl @@ -256,7 +256,7 @@ function update_centers!( if to_update[cj] if cweights[cj] > 0 - for i = 1:d + centers[:, cj] .+= view(X, :, j) centers[i, cj] += X[i, j] end else From a8ea8c78de6d486ce7fad665a57d2d27630ebcac Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 28 Jan 2019 11:41:58 +1100 Subject: [PATCH 06/13] Update src/kmeans.jl Co-Authored-By: tlienart --- src/kmeans.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kmeans.jl b/src/kmeans.jl index a160fbd8..009c3213 100644 --- a/src/kmeans.jl +++ b/src/kmeans.jl @@ -260,7 +260,7 @@ function update_centers!( centers[i, cj] += X[i, j] end else - for i = 1:d + centers[:, cj] .= view(X, :, j) centers[i, cj] = X[i, j] end end From c5bda5b76a9fcc44979cc602616380fa093a540f Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 28 Jan 2019 11:42:58 +1100 Subject: [PATCH 07/13] Update src/kmeans.jl Co-Authored-By: tlienart --- src/kmeans.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kmeans.jl b/src/kmeans.jl index 009c3213..a5673566 100644 --- a/src/kmeans.jl +++ b/src/kmeans.jl @@ -271,7 +271,7 @@ function update_centers!( # sum ==> mean @inbounds for j = 1:k if to_update[j] - cj = 1.0 / cweights[j] + centers[:, j] .*= 1.0 / cweights[j] vj = view(centers, :, j) for i = 1:d vj[i] *= cj From 3f8b15bc778d7849783c3b93f3f8694002e27b62 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 28 Jan 2019 11:44:01 +1100 Subject: [PATCH 08/13] Update src/kmeans.jl Co-Authored-By: tlienart --- src/kmeans.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/kmeans.jl b/src/kmeans.jl index a5673566..63f98191 100644 --- a/src/kmeans.jl +++ b/src/kmeans.jl @@ -293,7 +293,8 @@ function update_centers!( centers::AbstractMatrix{<:Real}, # out: updated centers (d x k) cweights::Vector{Float64}) # out: updated cluster weights (k) - (d, n), k = size(X), size(centers, 2) + d, n = size(X) + k = size(centers, 2) # initialize center weights cweights[to_update] .= 0.0 From 0d272e95dc2e1f07180d001ea1b2d9499d2fbb63 Mon Sep 17 00:00:00 2001 From: Thibaut Lienart Date: Mon, 28 Jan 2019 12:46:41 +1100 Subject: [PATCH 09/13] applying the changes following PR reviews --- src/kmeans.jl | 66 +++++++++++++++++++++----------------------------- src/seeding.jl | 10 +++----- test/kmeans.jl | 2 -- 3 files changed, 31 insertions(+), 47 deletions(-) diff --git a/src/kmeans.jl b/src/kmeans.jl index 63f98191..d8cd8387 100644 --- a/src/kmeans.jl +++ b/src/kmeans.jl @@ -34,7 +34,7 @@ function kmeans!(X::AbstractMatrix{<:Real}, centers::AbstractMatrix{<:Real}; counts = Vector{Int}(undef, k) cweights = Vector{Float64}(undef, k) - _kmeans!(X, conv_weights(Float64, n, weights), centers, assignments, + _kmeans!(X, conv_weights(eltype(X), n, weights), centers, assignments, costs, counts, cweights, maxiter, tol, display_level(display), distance) end @@ -62,9 +62,9 @@ end # core k-means skeleton function _kmeans!( - X::AbstractMatrix{<:Real}, # in: sample matrix (d x n) + X::AbstractMatrix{<:Real}, # in: sample matrix (d x n) w::Union{Nothing, AbstractVector{<:Real}}, # in: sample weights (n) - centers::AbstractMatrix{<:Real}, # in/out: matrix of centers (d x k) + centers::AbstractMatrix{<:Real}, # in/out: matrix of centers (d x k) assignments::Vector{Int}, # out: vector of assignments (n) costs::Vector{Float64}, # out: costs of the resultant assignments (n) counts::Vector{Int}, # out: # samples assigned to each cluster (k) @@ -82,10 +82,8 @@ function _kmeans!( num_affected::Int = k # number of centers, to which the distances need to be recomputed dmat = pairwise(distance, centers, X) - update_assignments!(dmat, true, assignments, costs, counts, - to_update, unused) - - objv = (w === nothing) ? sum(costs) : dot(w, costs) + update_assignments!(dmat, true, assignments, costs, counts, to_update, unused) + objv = w === nothing ? sum(costs) : dot(w, costs) # main loop t = 0 @@ -122,14 +120,13 @@ function _kmeans!( # update assignments - update_assignments!(dmat, false, assignments, costs, counts, - to_update, unused) + update_assignments!(dmat, false, assignments, costs, counts, to_update, unused) num_affected = sum(to_update) + length(unused) # compute change of objective and determine convergence prev_objv = objv - objv = (w === nothing) ? sum(costs) : dot(w, costs) + objv = w === nothing ? sum(costs) : dot(w, costs) objv_change = objv - prev_objv if objv_change > tol @@ -165,13 +162,13 @@ end # an updated (squared) distance matrix # function update_assignments!( - dmat::Matrix{<:Real}, # in: distance matrix (k x n) - is_init::Bool, # in: whether it is the initial run - assignments::Vector{Int}, # out: assignment vector (n) - costs::Vector{Float64}, # out: costs of the resultant assignment (n) - counts::Vector{Int}, # out: # samples assigned to each cluster (k) - to_update::Vector{Bool}, # out: whether a center needs update (k) - unused::Vector{Int}) # out: list of centers with no samples + dmat::AbstractMatrix{<:Real}, # in: distance matrix (k x n) + is_init::Bool, # in: whether it is the initial run + assignments::Vector{Int}, # out: assignment vector (n) + costs::Vector{Float64}, # out: costs of the resultant assignment (n) + counts::Vector{Int}, # out: # samples assigned to each cluster (k) + to_update::Vector{Bool}, # out: whether a center needs update (k) + unused::Vector{Int}) # out: list of centers with no samples k, n = size(dmat) @@ -255,14 +252,11 @@ function update_centers!( 1 <= cj <= k || error("assignment out of boundary.") if to_update[cj] + xj = view(X, :, j) if cweights[cj] > 0 - centers[:, cj] .+= view(X, :, j) - centers[i, cj] += X[i, j] - end + centers[:, cj] .+= xj else - centers[:, cj] .= view(X, :, j) - centers[i, cj] = X[i, j] - end + centers[:, cj] .= xj end cweights[cj] += 1 end @@ -271,11 +265,7 @@ function update_centers!( # sum ==> mean @inbounds for j = 1:k if to_update[j] - centers[:, j] .*= 1.0 / cweights[j] - vj = view(centers, :, j) - for i = 1:d - vj[i] *= cj - end + centers[:, j] .*= 1 / cweights[j] end end end @@ -300,20 +290,18 @@ function update_centers!( cweights[to_update] .= 0.0 # accumulate columns - # accumulate_cols_u!(centers, cweights, x, assignments, weights, to_update) - for j = 1:n - @inbounds wj = weights[j] + @inbounds for j = 1:n + wj = weights[j] if wj > 0 - @inbounds cj = assignments[j] + cj = assignments[j] 1 <= cj <= k || error("assignment out of boundary.") if to_update[cj] - rj = view(centers, :, cj) xj = view(X, :, j) if cweights[cj] > 0 - @inbounds rj .+= xj * wj + centers[:, cj] .+= xj * wj else - rj .= xj * wj + centers[:, cj] .= xj * wj end cweights[cj] += wj end @@ -321,9 +309,9 @@ function update_centers!( end # sum ==> mean - for j = 1:k + @inbounds for j = 1:k if to_update[j] - @inbounds centers[:, j] .*= 1.0 / cweights[j] + centers[:, j] .*= 1 / cweights[j] end end end @@ -336,7 +324,7 @@ function repick_unused_centers( X::AbstractMatrix{<:Real}, # in: the sample set (d x n) costs::Vector{Float64}, # in: the current assignment costs (n) centers::AbstractMatrix{<:Real}, # to be updated: the centers (d x k) - unused::Vector{Int}) # in: set of indices of centers to be updated + unused::Vector{Int}) # in: set of indices of centers to be updated # pick new centers using a scheme like kmeans++ ds = similar(costs) @@ -346,7 +334,7 @@ function repick_unused_centers( for i in unused j = wsample(1:n, tcosts) tcosts[j] = 0 - v = X[:, j] + v = view(X, :, j) centers[:, i] = v colwise!(ds, SqEuclidean(), v, X) diff --git a/src/seeding.jl b/src/seeding.jl index 38334740..326c1e81 100644 --- a/src/seeding.jl +++ b/src/seeding.jl @@ -40,8 +40,8 @@ initseeds_by_costs(algname::Symbol, costs::AbstractMatrix{<:Real}, k::Integer) = initseeds(iseeds::Vector{Int}, X::AbstractMatrix{<:Real}, k::Integer) = iseeds initseeds_by_costs(iseeds::Vector{Int}, costs::AbstractMatrix{<:Real}, k::Integer) = iseeds -function copyseeds!(S::DenseMatrix, X::AbstractMatrix{<:Real}, - iseeds::AbstractVector) +function copyseeds!(S::AbstractMatrix{T}, X::AbstractMatrix{T}, + iseeds::AbstractVector) where T<:Real d = size(X, 1) n = size(X, 2) k = length(iseeds) @@ -55,7 +55,7 @@ function copyseeds!(S::DenseMatrix, X::AbstractMatrix{<:Real}, end copyseeds(X::AbstractMatrix{<:Real}, iseeds::AbstractVector) = - copyseeds!(Matrix{eltype(X)}(undef, size(X,1), length(iseeds)), X, iseeds) + copyseeds!(similar(X, size(X, 1), length(iseeds)), X, iseeds) function check_seeding_args(n::Integer, k::Integer) k >= 1 || error("The number of seeds must be positive.") @@ -86,7 +86,6 @@ mutable struct KmppAlg <: SeedingAlgorithm end function initseeds!(iseeds::IntegerVector, alg::KmppAlg, X::AbstractMatrix{<:Real}, metric::PreMetric) - n = size(X, 2) k = length(iseeds) check_seeding_args(n, k) @@ -121,7 +120,6 @@ initseeds!(iseeds::IntegerVector, alg::KmppAlg, X::AbstractMatrix{<:Real}) = function initseeds_by_costs!(iseeds::IntegerVector, alg::KmppAlg, costs::AbstractMatrix{<:Real}) - n = size(costs, 1) k = length(iseeds) check_seeding_args(n, k) @@ -131,7 +129,7 @@ function initseeds_by_costs!(iseeds::IntegerVector, alg::KmppAlg, iseeds[1] = p if k > 1 - mincosts = copy(view(costs, :, p)) + mincosts = costs[:, p] mincosts[p] = 0 # pick remaining (with a chance proportional to mincosts) diff --git a/test/kmeans.jl b/test/kmeans.jl index bd6b0c90..4fc26fec 100644 --- a/test/kmeans.jl +++ b/test/kmeans.jl @@ -1,5 +1,3 @@ -# simple program to test the new k-means (not ready yet) - using Test using Clustering using Distances From 9cb02f7a9a54a170e1f9034ebe5e4e4b0752e4d3 Mon Sep 17 00:00:00 2001 From: Thibaut Lienart Date: Mon, 28 Jan 2019 13:16:52 +1100 Subject: [PATCH 10/13] reverting a few changes that seemed to cause spurious allocations --- src/kmeans.jl | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/src/kmeans.jl b/src/kmeans.jl index d8cd8387..37819e38 100644 --- a/src/kmeans.jl +++ b/src/kmeans.jl @@ -250,13 +250,15 @@ function update_centers!( @inbounds for j = 1:n cj = assignments[j] 1 <= cj <= k || error("assignment out of boundary.") - if to_update[cj] - xj = view(X, :, j) if cweights[cj] > 0 - centers[:, cj] .+= xj + for i = 1:d + centers[i, cj] += X[i, j] + end else - centers[:, cj] .= xj + for i = 1:d + centers[i, cj] = X[i, j] + end end cweights[cj] += 1 end @@ -265,7 +267,9 @@ function update_centers!( # sum ==> mean @inbounds for j = 1:k if to_update[j] - centers[:, j] .*= 1 / cweights[j] + for i = 1:d + centers[i, j] /= cweights[j] + end end end end @@ -295,13 +299,15 @@ function update_centers!( if wj > 0 cj = assignments[j] 1 <= cj <= k || error("assignment out of boundary.") - if to_update[cj] - xj = view(X, :, j) if cweights[cj] > 0 - centers[:, cj] .+= xj * wj + for i = 1:d + centers[i, cj] += X[i, j] * wj + end else - centers[:, cj] .= xj * wj + for i = 1:d + centers[i, cj] = X[i, j] * wj + end end cweights[cj] += wj end @@ -311,7 +317,9 @@ function update_centers!( # sum ==> mean @inbounds for j = 1:k if to_update[j] - centers[:, j] .*= 1 / cweights[j] + for i = 1:d + centers[i, j] /= cweights[j] + end end end end From 16ae803268bb7ecf38ea0537495acce3f1043d92 Mon Sep 17 00:00:00 2001 From: Thibaut Lienart Date: Mon, 28 Jan 2019 20:24:20 +1100 Subject: [PATCH 11/13] type changes, tying costs and centers to the type of X, similar to what was in place before but I believe more consistent. Benchmark results identical. --- src/kmeans.jl | 54 ++++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/src/kmeans.jl b/src/kmeans.jl index 37819e38..03fc57ed 100644 --- a/src/kmeans.jl +++ b/src/kmeans.jl @@ -5,7 +5,7 @@ mutable struct KmeansResult{T<:Real} <: ClusteringResult centers::Matrix{T} # cluster centers (d x k) assignments::Vector{Int} # assignments (n) - costs::Vector{Float64} # costs of the resultant assignments (n) + costs::Vector{T} # costs of the resultant assignments (n) counts::Vector{Int} # number of samples assigned to each cluster (k) cweights::Vector{Float64} # cluster weights (k) totalcost::Float64 # total cost (i.e. objective) (k) @@ -18,11 +18,12 @@ const _kmeans_default_maxiter = 100 const _kmeans_default_tol = 1.0e-6 const _kmeans_default_display = :none -function kmeans!(X::AbstractMatrix{<:Real}, centers::AbstractMatrix{<:Real}; - weights=nothing, maxiter::Integer=_kmeans_default_maxiter, +function kmeans!(X::AbstractMatrix{T}, centers::AbstractMatrix{T}; + weights::Union{Nothing, Vector{<:Real}}=nothing, + maxiter::Integer=_kmeans_default_maxiter, tol::Real=_kmeans_default_tol, display::Symbol=_kmeans_default_display, - distance::SemiMetric=SqEuclidean()) + distance::SemiMetric=SqEuclidean()) where T<:Real m, n = size(X) m2, k = size(centers) @@ -30,12 +31,12 @@ function kmeans!(X::AbstractMatrix{<:Real}, centers::AbstractMatrix{<:Real}; (2 <= k < n) || error("k must have 2 <= k < n.") assignments = Vector{Int}(undef, n) - costs = Vector{Float64}(undef, n) + costs = Vector{T}(undef, n) counts = Vector{Int}(undef, k) cweights = Vector{Float64}(undef, k) - _kmeans!(X, conv_weights(eltype(X), n, weights), centers, assignments, - costs, counts, cweights, maxiter, tol, + _kmeans!(X, conv_weights(T, n, weights), centers, + assignments, costs, counts, cweights, maxiter, tol, display_level(display), distance) end @@ -62,18 +63,18 @@ end # core k-means skeleton function _kmeans!( - X::AbstractMatrix{<:Real}, # in: sample matrix (d x n) + X::AbstractMatrix{T}, # in: sample matrix (d x n) w::Union{Nothing, AbstractVector{<:Real}}, # in: sample weights (n) - centers::AbstractMatrix{<:Real}, # in/out: matrix of centers (d x k) + centers::AbstractMatrix{T}, # in/out: matrix of centers (d x k) assignments::Vector{Int}, # out: vector of assignments (n) - costs::Vector{Float64}, # out: costs of the resultant assignments (n) + costs::Vector{T}, # out: costs of the resultant assignments (n) counts::Vector{Int}, # out: # samples assigned to each cluster (k) cweights::Vector{Float64}, # out: weights of each cluster maxiter::Integer, # in: maximum number of iterations tol::Real, # in: tolerance of change at convergence displevel::Int, # in: the level of display distance::SemiMetric # in: function to calculate the distance with - ) + ) where T<:Real # initialize k = size(centers, 2) @@ -82,6 +83,7 @@ function _kmeans!( num_affected::Int = k # number of centers, to which the distances need to be recomputed dmat = pairwise(distance, centers, X) + dmat = convert(Array{T}, dmat) # Can be removed if one day Distance.result_type(SqEuclidean(), T, T) == T update_assignments!(dmat, true, assignments, costs, counts, to_update, unused) objv = w === nothing ? sum(costs) : dot(w, costs) @@ -162,13 +164,14 @@ end # an updated (squared) distance matrix # function update_assignments!( - dmat::AbstractMatrix{<:Real}, # in: distance matrix (k x n) + dmat::Matrix{T}, # in: distance matrix (k x n) is_init::Bool, # in: whether it is the initial run assignments::Vector{Int}, # out: assignment vector (n) - costs::Vector{Float64}, # out: costs of the resultant assignment (n) + costs::Vector{T}, # out: costs of the resultant assignment (n) counts::Vector{Int}, # out: # samples assigned to each cluster (k) to_update::Vector{Bool}, # out: whether a center needs update (k) - unused::Vector{Int}) # out: list of centers with no samples + unused::Vector{Int} # out: list of centers with no samples + ) where T<:Real k, n = size(dmat) @@ -233,12 +236,13 @@ end # (specific to the case where samples are not weighted) # function update_centers!( - X::AbstractMatrix{<:Real}, # in: sample matrix (d x n) + X::AbstractMatrix{T}, # in: sample matrix (d x n) w::Nothing, # in: sample weights assignments::Vector{Int}, # in: assignments (n) to_update::Vector{Bool}, # in: whether a center needs update (k) - centers::AbstractMatrix{<:Real}, # out: updated centers (d x k) - cweights::Vector{Float64}) # out: updated cluster weights (k) + centers::AbstractMatrix{T}, # out: updated centers (d x k) + cweights::Vector{Float64} # out: updated cluster weights (k) + ) where T<:Real d, n = size(X) k = size(centers, 2) @@ -280,12 +284,13 @@ end # (specific to the case where samples are weighted) # function update_centers!( - X::AbstractMatrix{<:Real}, # in: sample matrix (d x n) + X::AbstractMatrix{T}, # in: sample matrix (d x n) weights::AbstractVector{<:Real}, # in: sample weights (n) assignments::Vector{Int}, # in: assignments (n) to_update::Vector{Bool}, # in: whether a center needs update (k) - centers::AbstractMatrix{<:Real}, # out: updated centers (d x k) - cweights::Vector{Float64}) # out: updated cluster weights (k) + centers::AbstractMatrix{T}, # out: updated centers (d x k) + cweights::Vector{Float64} # out: updated cluster weights (k) + ) where T<:Real d, n = size(X) k = size(centers, 2) @@ -329,10 +334,11 @@ end # Re-picks centers that get no samples assigned to them. # function repick_unused_centers( - X::AbstractMatrix{<:Real}, # in: the sample set (d x n) - costs::Vector{Float64}, # in: the current assignment costs (n) - centers::AbstractMatrix{<:Real}, # to be updated: the centers (d x k) - unused::Vector{Int}) # in: set of indices of centers to be updated + X::AbstractMatrix{T}, # in: the sample set (d x n) + costs::Vector{T}, # in: the current assignment costs (n) + centers::AbstractMatrix{T}, # to be updated: the centers (d x k) + unused::Vector{Int} # in: set of indices of centers to be updated + ) where T<:Real # pick new centers using a scheme like kmeans++ ds = similar(costs) From 192694a53e6499d4a8243ab8dcb470ce6f1f96e8 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Tue, 29 Jan 2019 11:15:19 +1100 Subject: [PATCH 12/13] Update src/kmeans.jl Co-Authored-By: tlienart --- src/kmeans.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kmeans.jl b/src/kmeans.jl index 03fc57ed..3b75dee0 100644 --- a/src/kmeans.jl +++ b/src/kmeans.jl @@ -19,7 +19,7 @@ const _kmeans_default_tol = 1.0e-6 const _kmeans_default_display = :none function kmeans!(X::AbstractMatrix{T}, centers::AbstractMatrix{T}; - weights::Union{Nothing, Vector{<:Real}}=nothing, + weights::Union{Nothing, AbstractVector{<:Real}}=nothing, maxiter::Integer=_kmeans_default_maxiter, tol::Real=_kmeans_default_tol, display::Symbol=_kmeans_default_display, From 603422db1a18348c85ac9caa5b3ad4597781050d Mon Sep 17 00:00:00 2001 From: Thibaut Lienart Date: Tue, 29 Jan 2019 23:54:43 +1100 Subject: [PATCH 13/13] reverting the rounding of maxiter as per requested change --- src/kmeans.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kmeans.jl b/src/kmeans.jl index 3b75dee0..f247ad67 100644 --- a/src/kmeans.jl +++ b/src/kmeans.jl @@ -36,7 +36,7 @@ function kmeans!(X::AbstractMatrix{T}, centers::AbstractMatrix{T}; cweights = Vector{Float64}(undef, k) _kmeans!(X, conv_weights(T, n, weights), centers, - assignments, costs, counts, cweights, maxiter, tol, + assignments, costs, counts, cweights, round(Int, maxiter), tol, display_level(display), distance) end