From caef5c3c0413ec68bf8e71235bd15dd55a434f57 Mon Sep 17 00:00:00 2001
From: Madeleine Udell <madeleine.udell@gmail.com>
Date: Tue, 23 Jul 2019 17:00:31 -0400
Subject: [PATCH] eliminate degenerate ordinal variables from hello_world.jl
 test

---
 src/losses.jl       | 11 ++++-------
 src/sample.jl       |  2 +-
 test/hello_world.jl | 15 +++++++++------
 todo.md             |  8 ++++++++
 4 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/src/losses.jl b/src/losses.jl
index f3eaf3d..c134fcd 100644
--- a/src/losses.jl
+++ b/src/losses.jl
@@ -465,14 +465,14 @@ mutable struct BvSLoss<:Loss
     scale::Float64
     domain::Domain
 end
-BvSLoss(m::Integer, scale::Float64=1.0; domain=OrdinalDomain(1,m), bin_loss::Loss=LogisticLoss(scale)) = BvSLoss(m,bin_loss,scale,domain)
+function BvSLoss(m::Integer, scale::Float64=1.0; domain=OrdinalDomain(1,m), bin_loss::Loss=LogisticLoss(scale))
+  @assert(m >= 2, error("Number of levels of ordinal variable must be at least 2; got $m."))
+  BvSLoss(m,bin_loss,scale,domain)
+end
 BvSLoss() = BvSLoss(10) # for copying correctly
 embedding_dim(l::BvSLoss) = l.max-1
 datalevels(l::BvSLoss) = 1:l.max # levels are encoded as the numbers 1:l.max
 
-# in Julia v0.4, argument u is a row vector (row slice of a matrix), which in julia is 2d
-# function evaluate(l::BvSLoss, u::Array{Float64,2}, a::Int)
-# this breaks compatibility with v0.4
 function evaluate(l::BvSLoss, u::Array{Float64,1}, a::Int)
     loss = 0
     for j in 1:length(u)
@@ -481,9 +481,6 @@ function evaluate(l::BvSLoss, u::Array{Float64,1}, a::Int)
     return l.scale*loss
 end
 
-# in Julia v0.4, argument u is a row vector (row slice of a matrix), which in julia is 2d
-# function grad(l::BvSLoss, u::Array{Float64,2}, a::Int)
-# this breaks compatibility with v0.4
 function grad(l::BvSLoss, u::Array{Float64,1}, a::Int)
   g = zeros(length(u))
   for j in 1:length(u)
diff --git a/src/sample.jl b/src/sample.jl
index 89e7820..08770fe 100644
--- a/src/sample.jl
+++ b/src/sample.jl
@@ -133,7 +133,7 @@ function sample(glrm::GLRM, do_sample::Function=all_entries, is_dense::Bool=true
 	# make sure we don't mutate the type of the array A
 	# even if all data for some real loss take integer values
 	for j=1:n
-		if isa(domains[j], RealDomain) && isa(glrm.A[j], DataArray{Int64,1})
+		if isa(domains[j], RealDomain) && isa(glrm.A[:,j], Array{Union{Missing, Int},1})
 			domains[j] = OrdinalDomain(minimum(dropmissing(glrm.A[j])), maximum(dropmissing(glrm.A[j])))
 		end
 	end
diff --git a/test/hello_world.jl b/test/hello_world.jl
index eb68de7..46e7c0f 100644
--- a/test/hello_world.jl
+++ b/test/hello_world.jl
@@ -1,4 +1,5 @@
 using LowRankModels, DataFrames, Random, SparseArrays
+Random.seed!(0)
 
 # loss types to test
 real_loss_types = [QuadLoss, HuberLoss]
@@ -27,6 +28,8 @@ end
 
 # regularizers to test
 regularizers = [QuadReg(), OneReg(5), NonNegConstraint(), KSparseConstraint(2)]
+# add more regularizers = more rows so the data isn't degenerate
+regularizers = cat(regularizers, fill(QuadReg(), 10), dims=1)
 
 m,n = length(regularizers), length(losses)
 
@@ -39,7 +42,7 @@ A_cat = rand(1:3, m, length(categorical_losses))
 A = Any[A_real A_bool A_ord A_cat]
 
 glrm = GLRM(A, losses, regularizers, QuadReg(), 2)
-fit!(glrm)
+fit!(glrm, verbose=false)
 println("successfully fit matrix")
 
 ### now fit data frame
@@ -48,18 +51,18 @@ df = NaNs_to_Missing!(DataFrame(Array(0 ./ A_sparse + A_sparse)))
 # explicitly encoding missing
 obs = observations(df)
 glrm = GLRM(df, QuadLoss(), QuadReg(), QuadReg(), 2, obs=obs)
-fit!(glrm)
+fit!(glrm, verbose=false)
 
 # implicitly encoding missings from dataframe - this functionality has not been implemented for dataframes
 # glrm = GLRM(df, QuadLoss(), QuadReg(), QuadReg(), 2)
-# fit!(glrm)
+# fit!(glrm, verbose=false)
 
 # without specifying losses directly
 glrm = GLRM(DataFrame(A), 3, data_types)
-fit!(glrm)
+fit!(glrm, verbose=false)
 println("successfully fit dataframe")
 
-# imputation and sampling
+### imputation and sampling
 impute(glrm)
 println("successfully imputed entries")
 sample(glrm)
@@ -71,5 +74,5 @@ println("successfully sampled from model")
 m, n = 10, 10
 sparseA = sprandn(m, n, .5)
 glrm = GLRM(A, QuadLoss(), QuadReg(), QuadReg(), 5)
-fit!(glrm)
+fit!(glrm, verbose=false)
 println("successfully fit sparse GLRM")
diff --git a/todo.md b/todo.md
index 327ee08..75a8b68 100644
--- a/todo.md
+++ b/todo.md
@@ -26,3 +26,11 @@
 * sample doesn't work
 * lots of bugs in fit_dataframe_w_type_imputation; deprecated for now. (also it's an odd thing to do.)
 * imputation doesn't return correct type (for dataframes)
+
+# How to register/publish a new version of the package
+
+1. update version number in Project.toml
+2. navigate to commit that you want tagged on github
+3. comment @Registrator register
+4. monitor resulting PR on the general registry to see if any bugs are found
+5. when PR is accepted, use Tagger to make github release