-
Notifications
You must be signed in to change notification settings - Fork 0
/
poisson_nmf.h
244 lines (220 loc) · 7.56 KB
/
poisson_nmf.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
// Copyright 2018 Ghislain Durif
//
// This file is part of the `pCMF' library for R and related languages.
// It is made available under the terms of the GNU General Public
// License, version 2, or at your option, any later version,
// incorporated herein by reference.
//
// This program is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the GNU General Public License for more
// details.
//
// You should have received a copy of the GNU General Public
// License along with this program; if not, write to the Free
// Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
// MA 02111-1307, USA
/*!
* \brief definitions of the 'poisson_nmf' class (Non-negative Matrix Factorization in the Poisson setting)
* \author Ghislain Durif
* \version 1.0
* \date 21/02/2018
*/
#ifndef POISSON_NMF_H
#define POISSON_NMF_H
#include <RcppEigen.h>
#include "model.h"
#include "utils/random.h"
// [[Rcpp::depends(RcppEigen)]]
using Eigen::MatrixXd; // variable size matrix, double precision
namespace pCMF {
/*!
* \brief class definition for a non-negative matrix factorization model in the Poisson setting
*
* Defines a Non-negative Matrix Factorization for count matrices,
* based on a Poisson model over the count, i.e.
* f\[
* X \sim P(UV^t)
* f\]
*
* Optimization process follows what was proposed by [1]
* and implemented by [2]: minimization of the Bregman divergence in the
* Poisson setting between f\$ X f\$ and \f$ UV^t f\$, which happens to be
* the Kullback-Leibler divergence between f\$ X f\$ and \f$ UV^t f\$,
* which is equivalent to minimizing the deviance of the model previously defined.
*
* [1] Brunet, J.-P., Tamayo, P., Golub, T.R., Mesirov, J.P., 2004.
* Metagenes and molecular pattern discovery using matrix factorization.
* PNAS 101, 4164–4169.
*
* [2] Gaujoux, R., Seoighe, C., 2010.
* A flexible R package for nonnegative matrix factorization.
* BMC Bioinformatics 11, 367.
*
*/
class poisson_nmf : public matrix_factor_model {
protected:
MatrixXd m_oldU; /*!< previous values for latent components, dim n x K (representation of individuals) */
MatrixXd m_oldV; /*!< previous values for latent loadings, dim p x K (contribution of recorded variables) */
public:
/*!
* \brief constructor for the class `poisson_nmf`
*/
poisson_nmf(int n, int p, int K, const MatrixXd &X);
/*!
* \brief randomly perturb parameters
*
* \param[in,out] rng random number generator
* \param[in] noise_level level of the perturbation, based on a uniform
* distribution on [-noise_level, noise_level]
*/
virtual void perturb_param(myRandom::RNGType &rng, double noise_level);
/*!
* \brief update rules for parameters in the optimization process
*
* Update U and V as proposed in [1]
*
* f\[
* U_{i,k} \gets U_{i,k} \times \frac{\sum_j V_{j,k} X_{i,j} / (UV^t)_{i,j}}{\sum_j V_{j,k}}
* f\]
*
* f\[
* V_{jk} \gets V_{j,k} \times \frac{\sum_i U_{i,k} X_{i,j} / (UV^t)_{i,j}}{\sum_i U_{i,k}}
* f\]
*
* [1] Brunet, J.-P., Tamayo, P., Golub, T.R., Mesirov, J.P., 2004.
* Metagenes and molecular pattern discovery using matrix factorization.
* PNAS 101, 4164–4169.
*
*/
virtual void update_param();
/*!
* \brief update parameters values between iterations
*
* current values of parameters become old values of parameters
*/
virtual void prepare_next_iterate();
/*!
* \brief compute absolute and normalized gap of parameters between two iterates
*
* \param[out] abs_gap absolute gap.
* \param[out] norm_gap normalized gap.
*/
virtual void gap_between_iterates(double &abs_gap, double& norm_gap);
/*!
* \brief compute a convergence criterion to assess convergence based on
* the RV coefficients
*
* The RV coefficients measures the closeness of the two set of points stored
* in two matrices [1]. Here, we compute:
* f\[
* crit = min( RV_coeff(U_{new}, U_{old}), RV_coeff(V_{new}, V_{old}) )
* f\]
*
* Important: The RV coefficient custom is transformed so that
* it converges to zero when convergence occurs. Thus, this function
* returns f\$ 1 - crit f\$.
*
* \return value of the criterion
*
* [1] Friguet, C., 2010. Impact de la dépendance dans les procédures de tests
* multiples en grande dimension. Rennes, AGROCAMPUS-OUEST.
*/
virtual double custom_conv_criterion();
/*!
* \brief compute the optimization criterion associated to the Poisson NMF model
*
* The loss is the Bregman divergence between \f$ X \f$ and \f$ UV^t \f$
* in the Poisson framework:
*
* \f[
* d(X,Y) = \sum_{i,j} d(x_{i,j}, y_{i,j})
* \f]
*
* with \f$ d(x,y) = x \log\frac{x}{y} - x + y \f$
*
* This is equivalent to optimizing the deviance of the Poisson model, i.e.
*
* \f[
* deviance = -2 \times [ \log p(X | \Lambda = UV^t) - \log p(X | \Lambda = X)]
* \f]
*
* where \f$ \log p(X | \Lambda ) \f$ is the Poisson log-likelihood and
* \f$ \Lambda \f$ the Poisson rate matrix, i.e
*
* \f[
* \log p(X | \Lambda ) = \log p(x_{ij} | \lambda_{ij})
* \f]
*/
virtual double optim_criterion();
/*!
* \brief compute the Poisson log-likelihood associated to the Poisson NMF model
*
* \f[
* \log p(X | \Lambda = UV^t)
* \f]
*
* where \f$ \log p(X | \Lambda ) \f$ is the Poisson log-likelihood and
* \f$ \Lambda \f$ the Poisson rate matrix, i.e
*
* \f[
* \log p(X | \Lambda ) = \log p(x_{ij} | \lambda_{ij})
* \f]
*/
virtual double loglikelihood();
/*!
* \brief compute the deviance associated to the Poisson NMF model
*
* \f[
* deviance = -2 \times [ \log p(X | \Lambda = UV^t) - \log p(X | \Lambda = X)]
* \f]
*
* where \f$ \log p(X | \Lambda ) \f$ is the Poisson log-likelihood and
* \f$ \Lambda \f$ the Poisson rate matrix, i.e
*
* \f[
* \log p(X | \Lambda ) = \log p(x_{ij} | \lambda_{ij})
* \f]
*
* This is equivalent to computing the Bregman divergence
* between \f$ X \f$ and \f$ UV^t \f$ in the Poisson framework:
*
* \f[
* d(X,Y) = \sum_{i,j} d(x_{i,j}, y_{i,j})
* \f]
*
* with \f$ d(x,y) = x \log\frac{x}{y} - x + y \f$
*/
virtual double deviance();
/*!
* \brief compute the percentage of explained deviance associated
* to the Poisson NMF model
*
* \f[
* %deviance = \frac{ \log p(X | \Lambda = UV^t) - \log p(X | \Lambda = \bar{X})}{ \log p(X | \Lambda = X) - \log p(X | \Lambda = \bar{X}) }
* \f]
*
* where \f$ \log p(X | \Lambda ) \f$ is the Poisson log-likelihood and
* \f$ \Lambda \f$ the Poisson rate matrix, i.e
*
* \f[
* \log p(X | \Lambda ) = \log p(x_{ij} | \lambda_{ij})
* \f]
*
* and \f$ \bar{X} \f$ the column-wise empirical mean of \f$ X \f$
*
*/
virtual double exp_deviance();
protected:
/*!
* \brief compute partial deviance using a sub-set of factors (among 1...K)
*
* \param[in] factor integer vector of size 'K' giving the sub-set of
* 'k' factors to consider to compute the deviance in first 'k' positions.
* \param[in] k integer, sub-dimension to consider (<='K').
*/
virtual double partial_deviance(const vector<int> &factor, int k);
};
}
#endif // POISSON_NMF_H