-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmaxEntDistribution.m
executable file
·141 lines (125 loc) · 5.6 KB
/
maxEntDistribution.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
function [ lambda, entropy ] = maxEntDistribution( dim, num_moms, acc, em, ul, ll, l1hp, l12numhp, l1ap, l12numap, c_scale )
%
% dim -- distribution dimensionality
% num_moms -- nonnegative scalar integer
% acc -- nonnegative scalar integer for sparse gridding accuracy
% em -- l-by-1 vector of empirically computed moments for all
% combinations with l1 norm less than num_moms
% ul -- 1-by-dim (the upper limit of integration)
% ll -- 1-by-dim (the lower limit of integration)
% l1hp -- l-by-dim indexing array telling us where to look in the
% moments matrix while building the hessian
% l12numhp -- ( num_moms + 1 )^dim-by-1 indexing array telling us
% where to look in the l1powers array while building the
% hessian
% l1ap -- k-by-dim indexing array telling us where to look in the
% moments matrix while building the hessian
% l12numap -- ( 2 * num_moms + 1 )^dim )-by-1 indexing array telling us
% where to look in the l1powers array while building the
% hessian
% c_scale -- 1-by-dim describing the scaling in each direction (useful
% for plotting!)
%
% Computes the lambda_j for the maximum entropy distribution exp(\sum_{\abs{j} \leq n}
% \lambda_j x^j) such that its moments are equal to the emp_moments
%% parameters
% addpath( 'nwSpGr' );
numl1hp= size( l1hp, 1 );
numl1ap = size( l1ap, 1 );
numSteps = 100;
criterion = 1e-6;
mod = 1e-10;
plotting = 0; % only works for 3D data
%% preallocating nodes and powers
[ nodes, weights ] = nwspgr( 'KPU', dim, acc );
% need to apply change of variables on nodes
nodes = ( repmat( ul, size( nodes, 1 ), 1 ) - repmat( ll, size( nodes, 1 ), 1 ) ) .* nodes + repmat( ll, size( nodes, 1 ), 1 );
x = msspoly( 'x', dim );
[ ~, xn ] = isfree( x );
xhp = msspoly( [ numl1hp 1 ], [ (1:numl1hp)' ones(numl1hp,1) ], repmat(xn',numl1hp,1), l1hp, ones(numl1hp,1) );
xap = msspoly( [ numl1ap 1 ], [ (1:numl1ap)' ones(numl1ap,1) ], repmat(xn',numl1ap,1), l1ap, ones(numl1ap,1) );
%% newton's method for estimating lambda
% pre_lambda = randn( size( xhp, 1 ), 1 );
pre_lambda = zeros( size( xhp, 1 ), 1 );
moments = computeMoments( x, nodes', ul, ll, weights, xap, xhp, pre_lambda' );
[ H, am ] = moments2Hessian( num_moms, moments, l1hp, l12numap );
post_lambda = pre_lambda + pinv( H ) * ( em - am );
% sc = norm( post_lambda - pre_lambda, 1 )
sc = Inf;
counter = 1;
% foo = ones( size( em ) );
% foo( end ) = 1e5;
% em = foo .* em;
while ( sc > criterion && counter < numSteps );
pre_lambda = post_lambda;
moments = computeMoments( x, nodes', ul, ll, weights, xap, xhp, pre_lambda' );
[ H, am ] = moments2Hessian( num_moms, moments, l1hp, l12numap );
sc = norm( ( em - am )./em , Inf )
if( any( isnan( H( : ) ) ) )
break;
end
[ U, S, V ] = svd( H );
if( cond( S ) > 1e5 || cond( S ) < 1e-5 )
if( isinf( cond( S ) ) )
break;
end
for i = 20:-10:-20
M = cond( S ) * 0.8^i * eye( size( S, 1 ) );
m2 = computeMoments( x, nodes', ul, ll, weights, xap, xhp, ( pre_lambda + cond( S ) * pinv( U * ( S + M ) * V' ) * ( em - am ) )' );
[ ~, am2 ] = moments2Hessian( num_moms, m2, l1hp, l12numap );
if( norm( em - am2, 2 ) < sc )
% i
break;
end
end
post_lambda = pre_lambda + cond( S ) * 0.9^i * pinv( U * ( S + M ) * V' ) * ( em - am );
else
post_lambda = pre_lambda + pinv( U * S * V' ) * ( em - am );
end
% sprintf( 'min diag = %f and cond = %f', min( abs( diag( S ) ) ), cond( S ) )
% h_mod = min( mod, 1e-3 * sc ); % as we get closer to the optimum we want the scaling to be less aggressive so that convergence happens quicker
% moddiag = h_mod * eye( size( S, 1 ) );
% moddiag( S > h_mod ) = 0;
% moddiag( : ) = 0;
% post_lambda = pre_lambda + pinv( U * ( S + moddiag ) * V' ) * ( em - am );
counter = counter + 1;
end
if( any( isnan( H( : ) ) ) || isinf( cond( S ) ) )
lambda = nan;
entropy = nan;
else
lambda = post_lambda;
entropy = -sum( lambda .* am ) + log( abs( prod( c_scale ) ) );
end
% if( dim == 1 )
% variance = am( 3 ) - 2 * am( 2 ) * am( 2 ) + am( 2 )^2 * am( 1 );
% end
%% plotting functionality (only works for 3D data at the moment)
% f = @(x) exp( lambda' * prod( repmat( x, size( l1hp, 1 ), 1 ).^l1hp, 2 ) );
% samplesfromDist = slicesample( ( ll + ul )/2 , 5000, 'pdf', f, 'thin', 5, 'burnin', 10000, 'width', 0.025 );
%
%
% figure( 1 );
% scatter3( samplesfromDist( :, 1 ) * c_scale( 1 ), samplesfromDist( :, 2 ) * c_scale( 2 ), ...
% samplesfromDist( :, 3 ) * c_scale( 3 ) );
% % this plotting will work if the data is in 2D
% figure( 1 );
% cloudPlot( samplesfromDist( :, 1 ) * c_scale( 1 ), samplesfromDist( :, 2 ) * c_scale( 2 ) )
%
% % this ploting will work if the data is in 2D
% [ X, Y ] = meshgrid( ll(1):.01:ul(1), ll(2):0.01:ul(2) );
% exp_helper = exp( dmsubs( lambda' * xhp, x, [ X( : ) Y( : ) ]' ) );
% foo = reshape( exp_helper, size( X, 1 ), size( X, 2 ) );
% figure( 2 );
% mesh( X * c_scale( 1 ), Y * c_scale( 2 ), foo )
%
% if( plotting )
% % [ X, Y, Z ] = sphere( 20 );
% % exp_helper = exp( dmsubs( lambda' * xhp, x, [ X( : ) Y( : ) Z( : ) ]' ) );
% % [ az, el, ~ ] = cart2sph( X( : ), Y( : ), Z( : ) );
% % figure;
% % PlotSphereIntensity( az * 180/pi, el * 180/pi, exp_helper' );
% space = linspace( ll, ul, 1001 );
% exp_helper = exp( dmsubs( lambda' * xhp, x, space ) );
% figure; plot( space, exp_helper );
% end