minor tweaks to shiftingGaussian (or perhaps some error fixes) but wo…

…rking towards vectorized-evaluation of likelihood in TOF space in mcModelIntegration demo
gcrich · Dec 13, 2016 · 282d137 · 282d137
1 parent b49dd90
commit 282d137
Show file tree

Hide file tree

Showing 2 changed files with 75 additions and 16 deletions.
diff --git a/tests/mcModelIntegration.py b/tests/mcModelIntegration.py
@@ -7,6 +7,7 @@
 # needed to numerically evaluate likelihoods / probabilities
 
 import numpy as np
+from numpy import inf
 from scipy.integrate import quad
 import scipy.optimize as optimize
 import matplotlib.pyplot as plot
@@ -70,6 +71,26 @@ def evaluateModel( params, coords):
     return returnVal / (mp_sigma*np.sqrt(2*np.pi))
 
 
+def generateModelData(params, nSamples):
+    """Generate some fake data from our mode with num samples = nSamples
+    params is an array of the parameters, [e0, e1, sigma]
+    Returns a tuple of len(nSamples), [x, ed, en, tof]
+    """
+    data_x=np.random.uniform(low=0.0, high=distance_cellLength, size=nSamples)
+    data_ed= np.random.normal(loc=params[0] + params[1]*data_x, 
+                              scale=params[2])
+    data_en = getDDneutronEnergy(data_ed)
+
+    neutronDistance = distance_cellToZero + (distance_cellLength - data_x)
+    neutronTOF = getTOF(mass_neutron, data_en, neutronDistance)
+    effectiveDenergy = (params[0] + data_ed)/2
+    deuteronTOF = getTOF( mass_deuteron, effectiveDenergy, data_x )
+    data_tof = neutronTOF + deuteronTOF
+
+    data = np.column_stack((data_x,data_ed,data_en,data_tof))
+    return data
+
+
 # mp_* are model parameters
 # *_t are 'true' values that go into our fake data
 mp_initialEnergy_t = 1100 # initial deuteron energy, in keV
@@ -78,7 +99,7 @@ def evaluateModel( params, coords):
 
 
 # generate fake data
-nSamples = 10000
+nSamples = 1000000
 data_model_x = np.random.uniform(low=0.0, high=distance_cellLength, 
                                  size=nSamples)
 data_model_ed = np.random.normal(loc=mp_initialEnergy_t + 
@@ -97,7 +118,7 @@ def evaluateModel( params, coords):
 
 # plot the fake data...
 plot.figure(1)
-plot.scatter(data_model_x, data_model_en, color='k', alpha=0.3)
+plot.scatter(data_model_x[:1000], data_model_en[:1000], color='k', alpha=0.3)
 plot.xlabel('Cell location (cm)')
 plot.ylabel('Neutron energy (keV)')
 plot.show()
@@ -111,7 +132,7 @@ def evaluateModel( params, coords):
 
 # plot the TOF vs x location
 plot.figure(3)
-plot.scatter(data_model_en,modelTOFdata, color='k', alpha=0.3)
+plot.scatter(data_model_en[:2000],modelTOFdata[:2000], color='k', alpha=0.3)
 plot.xlabel('Neutron energy (keV)' )
 plot.ylabel('TOF (ns)')
 plot.show()
@@ -124,9 +145,46 @@ def evaluateModel( params, coords):
 print('number of events found {}'.format(len(tofSatisfiedData)))
 
 plot.figure(10)
-plot.scatter(tofSatisfiedData[:,0],tofSatisfiedData[:,1],color='red',
+plot.scatter(tofSatisfiedData[:500,0],tofSatisfiedData[:500,1],color='red',
              alpha=0.5, zorder=10)
-plot.scatter(data_model_x, data_model_ed, color='k', alpha=0.2,zorder=1)
+plot.scatter(data_model_x[:5000], data_model_ed[:5000], color='k', alpha=0.2,zorder=1)
 plot.ylabel('Deuteron energy (keV)')
 plot.xlabel('Location in cell (cm)')
-plot.show()
+plot.show()
+
+histTOFdata, tof_bin_edges = np.histogram(fakeDataSet[:,3], 25, (175.0,200.0),
+                                          density=True)
+plot.figure(20)
+plot.scatter(tof_bin_edges[:-1], histTOFdata, color='k')
+plot.xlabel('TOF (ns)')
+plot.ylabel('Counts')
+plot.show()
+
+plot.figure(21)
+plot.scatter(tof_bin_edges[:-1],np.log(histTOFdata), color='k')
+plot.xlabel('TOF (ns)')
+plot.ylabel('log PDF')
+plot.show()
+
+
+# make some small "observed" fake data
+nObsTestEvents = 50
+fakeObsData = generateModelData([mp_initialEnergy_t,mp_loss0_t,mp_sigma_t],
+                                nObsTestEvents)
+fakeObsBadData = generateModelData([mp_initialEnergy_t,mp_loss0_t,
+                                    mp_sigma_t*0.6],nObsTestEvents)
+
+# make our vector of 'n'
+observedVectorN, observed_bin_edges = np.histogram(fakeObsData[:,3], 25,
+                                                   (175.0,200.0))
+observedVectorNbad, observed_bin_edges_bad = np.histogram(fakeObsBadData[:,3],
+                                                          25,
+                                                   (175.0,200.0))
+
+loghist = np.log(histTOFdata)
+loghist[loghist==-inf] = 0
+
+testLogLike = np.dot(observedVectorN, loghist)
+testLogLikeBad = np.dot(observedVectorNbad, loghist)
+print('test loglikelihood value {}, and for off-observables {}'.format(
+      testLogLike, testLogLikeBad))
diff --git a/tests/shiftingGaussian_brute.py b/tests/shiftingGaussian_brute.py
@@ -95,7 +95,7 @@ def lnlikeFromProjProb(modelParams, observable):
 
     return loglikelihood
 
-def numLnlikeFromProjProb(modelParams, observable):
+def numLnlikeFromProjProb(modelParams, observables):
     """Get the log likelihood from the numerically projected PDF
     argument modelParams is an array of the model parameters, [sigma, m, b]
     argument observables is an array of data to fit against
@@ -104,7 +104,7 @@ def numLnlikeFromProjProb(modelParams, observable):
     #xMin=-10
     sigma, m, b = modelParams
     #print('sigma {}, m {}, b {}'.format(sigma,m,b))
-    y = observable # for convenience inside the function
+    y = observables # for convenience inside the function
     projectedProbabilities = getNumProjectedProb( sigma, m, b, y)
     logProjProbs = np.log(projectedProbabilities)
     loglikelihood = np.sum(logProjProbs)
@@ -293,15 +293,15 @@ def lnprobNumeric(modelParams, observables):
 #    return lp + lnlike(theta, x, y, yerr)
 #    
 #######################################
-ndim, nwalkers = 3, 250
+ndim, nwalkers = 3, 100
 
 sigma, m, b = [0.4, -0.3, 5]
 pos = [[sigma, m, b] + 1e-4*np.random.randn(ndim) for i in range(nwalkers)]
 print(type(yVals))
 print(len(yVals))
 sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprobNumeric, 
-                                kwargs={'observables': yVals}, threads = 8)
-sampler.run_mcmc(pos, 1000)
+                                kwargs={'observables': yVals}, threads = 10)
+sampler.run_mcmc(pos, 500)
 
 
 plot.figure(10)
@@ -333,7 +333,7 @@ def lnprobNumeric(modelParams, observables):
 plot.ylabel('counts')
 plot.show()
 #
-samples = sampler.chain[:, -200:, :].reshape((-1, ndim))
+samples = sampler.chain[:,200:, :].reshape((-1, ndim))
 #
 #
 #
@@ -350,14 +350,15 @@ def lnprobNumeric(modelParams, observables):
 nPTtemps = 20
 nPTwalkers = 100
 ptSampler = emcee.PTSampler( nPTtemps, nPTwalkers, ndim, 
-                      numLnlikeFromProjProb, lnPriors)
+                      numLnlikeFromProjProb, lnPriors, threads=10,
+                      loglkwargs={'observables': yVals})
 #
-p0 = optimizeResult["x"] + 1e-3*np.random.randn(nPTtemps,nPTwalkers,ndim)
-for p, lnl, lnp in ptSampler.sample(p0,iterations=1000, threads=10):
+p0 = [sigma, m, b] + 1e-3*np.random.randn(nPTtemps,nPTwalkers,ndim)
+for p, lnl, lnp in ptSampler.sample(p0,iterations=1000):
     pass
 ptSampler.reset()
 for p, lnl, lnp in ptSampler.sample( p, lnprob0=lnp, lnlike0=lnl, 
-                                    iterations=10000,thin=10, threads=10):
+                                    iterations=10000,thin=10):
     pass