From caef5c3c0413ec68bf8e71235bd15dd55a434f57 Mon Sep 17 00:00:00 2001 From: Madeleine Udell Date: Tue, 23 Jul 2019 17:00:31 -0400 Subject: [PATCH] eliminate degenerate ordinal variables from hello_world.jl test --- src/losses.jl | 11 ++++------- src/sample.jl | 2 +- test/hello_world.jl | 15 +++++++++------ todo.md | 8 ++++++++ 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/losses.jl b/src/losses.jl index f3eaf3d..c134fcd 100644 --- a/src/losses.jl +++ b/src/losses.jl @@ -465,14 +465,14 @@ mutable struct BvSLoss<:Loss scale::Float64 domain::Domain end -BvSLoss(m::Integer, scale::Float64=1.0; domain=OrdinalDomain(1,m), bin_loss::Loss=LogisticLoss(scale)) = BvSLoss(m,bin_loss,scale,domain) +function BvSLoss(m::Integer, scale::Float64=1.0; domain=OrdinalDomain(1,m), bin_loss::Loss=LogisticLoss(scale)) + @assert(m >= 2, error("Number of levels of ordinal variable must be at least 2; got $m.")) + BvSLoss(m,bin_loss,scale,domain) +end BvSLoss() = BvSLoss(10) # for copying correctly embedding_dim(l::BvSLoss) = l.max-1 datalevels(l::BvSLoss) = 1:l.max # levels are encoded as the numbers 1:l.max -# in Julia v0.4, argument u is a row vector (row slice of a matrix), which in julia is 2d -# function evaluate(l::BvSLoss, u::Array{Float64,2}, a::Int) -# this breaks compatibility with v0.4 function evaluate(l::BvSLoss, u::Array{Float64,1}, a::Int) loss = 0 for j in 1:length(u) @@ -481,9 +481,6 @@ function evaluate(l::BvSLoss, u::Array{Float64,1}, a::Int) return l.scale*loss end -# in Julia v0.4, argument u is a row vector (row slice of a matrix), which in julia is 2d -# function grad(l::BvSLoss, u::Array{Float64,2}, a::Int) -# this breaks compatibility with v0.4 function grad(l::BvSLoss, u::Array{Float64,1}, a::Int) g = zeros(length(u)) for j in 1:length(u) diff --git a/src/sample.jl b/src/sample.jl index 89e7820..08770fe 100644 --- a/src/sample.jl +++ b/src/sample.jl @@ -133,7 +133,7 @@ function sample(glrm::GLRM, do_sample::Function=all_entries, is_dense::Bool=true # make sure we don't mutate the type of the array A # even if all data for some real loss take integer values for j=1:n - if isa(domains[j], RealDomain) && isa(glrm.A[j], DataArray{Int64,1}) + if isa(domains[j], RealDomain) && isa(glrm.A[:,j], Array{Union{Missing, Int},1}) domains[j] = OrdinalDomain(minimum(dropmissing(glrm.A[j])), maximum(dropmissing(glrm.A[j]))) end end diff --git a/test/hello_world.jl b/test/hello_world.jl index eb68de7..46e7c0f 100644 --- a/test/hello_world.jl +++ b/test/hello_world.jl @@ -1,4 +1,5 @@ using LowRankModels, DataFrames, Random, SparseArrays +Random.seed!(0) # loss types to test real_loss_types = [QuadLoss, HuberLoss] @@ -27,6 +28,8 @@ end # regularizers to test regularizers = [QuadReg(), OneReg(5), NonNegConstraint(), KSparseConstraint(2)] +# add more regularizers = more rows so the data isn't degenerate +regularizers = cat(regularizers, fill(QuadReg(), 10), dims=1) m,n = length(regularizers), length(losses) @@ -39,7 +42,7 @@ A_cat = rand(1:3, m, length(categorical_losses)) A = Any[A_real A_bool A_ord A_cat] glrm = GLRM(A, losses, regularizers, QuadReg(), 2) -fit!(glrm) +fit!(glrm, verbose=false) println("successfully fit matrix") ### now fit data frame @@ -48,18 +51,18 @@ df = NaNs_to_Missing!(DataFrame(Array(0 ./ A_sparse + A_sparse))) # explicitly encoding missing obs = observations(df) glrm = GLRM(df, QuadLoss(), QuadReg(), QuadReg(), 2, obs=obs) -fit!(glrm) +fit!(glrm, verbose=false) # implicitly encoding missings from dataframe - this functionality has not been implemented for dataframes # glrm = GLRM(df, QuadLoss(), QuadReg(), QuadReg(), 2) -# fit!(glrm) +# fit!(glrm, verbose=false) # without specifying losses directly glrm = GLRM(DataFrame(A), 3, data_types) -fit!(glrm) +fit!(glrm, verbose=false) println("successfully fit dataframe") -# imputation and sampling +### imputation and sampling impute(glrm) println("successfully imputed entries") sample(glrm) @@ -71,5 +74,5 @@ println("successfully sampled from model") m, n = 10, 10 sparseA = sprandn(m, n, .5) glrm = GLRM(A, QuadLoss(), QuadReg(), QuadReg(), 5) -fit!(glrm) +fit!(glrm, verbose=false) println("successfully fit sparse GLRM") diff --git a/todo.md b/todo.md index 327ee08..75a8b68 100644 --- a/todo.md +++ b/todo.md @@ -26,3 +26,11 @@ * sample doesn't work * lots of bugs in fit_dataframe_w_type_imputation; deprecated for now. (also it's an odd thing to do.) * imputation doesn't return correct type (for dataframes) + +# How to register/publish a new version of the package + +1. update version number in Project.toml +2. navigate to commit that you want tagged on github +3. comment @Registrator register +4. monitor resulting PR on the general registry to see if any bugs are found +5. when PR is accepted, use Tagger to make github release