From 97e5d702458266c42241dbb9fe479a66e94562a6 Mon Sep 17 00:00:00 2001
From: gully <igully@gmail.com>
Date: Fri, 10 Mar 2017 13:31:26 -0800
Subject: [PATCH] First commit of the star_BB.py script to infer a second black
 body component.  Relates to Issue #75

---
 scripts/star_BB.py | 504 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 504 insertions(+)
 create mode 100755 scripts/star_BB.py

diff --git a/scripts/star_BB.py b/scripts/star_BB.py
new file mode 100755
index 00000000..9db23a19
--- /dev/null
+++ b/scripts/star_BB.py
@@ -0,0 +1,504 @@
+#!/usr/bin/env python
+
+# All of the argument parsing is done in the `parallel.py` module.
+
+import multiprocessing
+import time
+import numpy as np
+import Starfish
+from Starfish.model_BB import ThetaParam, PhiParam
+
+import argparse
+parser = argparse.ArgumentParser(prog="star_BB.py", description="Run Starfish fitting model in single order mode with many walkers, with extra black body component.")
+parser.add_argument("--samples", type=int, default=5, help="How many samples to run?")
+parser.add_argument("--incremental_save", type=int, default=100, help="How often to save incremental progress of MCMC samples.")
+parser.add_argument("--resume", action="store_true", help="Continue from the last sample. If this is left off, the chain will start from your initial guess specified in config.yaml.")
+args = parser.parse_args()
+
+import os
+
+import Starfish.grid_tools
+from Starfish.spectrum import DataSpectrum, Mask, ChebyshevSpectrum
+from Starfish.emulator import Emulator
+from Starfish.emulator import F_bol_interp
+import Starfish.constants as C
+from Starfish.covariance import get_dense_C, make_k_func, make_k_func_region
+
+from scipy.special import j1
+from scipy.interpolate import InterpolatedUnivariateSpline
+from scipy.linalg import cho_factor, cho_solve
+from numpy.linalg import slogdet
+from astropy.stats import sigma_clip
+from astropy.analytic_functions import blackbody_lambda
+from astropy import units as u
+from astropy import constants as const
+
+
+import gc
+import logging
+
+from itertools import chain
+#from collections import deque
+from operator import itemgetter
+import yaml
+import shutil
+import json
+
+
+
+Starfish.routdir = ""
+
+# list of keys from 0 to (norders - 1)
+order_keys = np.arange(1)
+DataSpectra = [DataSpectrum.open(os.path.expandvars(file), orders=Starfish.data["orders"]) for file in Starfish.data["files"]]
+# list of keys from 0 to (nspectra - 1) Used for indexing purposes.
+spectra_keys = np.arange(len(DataSpectra))
+
+#Instruments are provided as one per dataset
+Instruments = [eval("Starfish.grid_tools." + inst)() for inst in Starfish.data["instruments"]]
+
+stef_boltz = const.sigma_sb.to(u.erg/u.s/u.cm**2/u.K**4).value #cgs
+
+logging.basicConfig(format="%(asctime)s - %(levelname)s - %(name)s -  %(message)s", filename="{}log.log".format(
+    Starfish.routdir), level=logging.DEBUG, filemode="w", datefmt='%m/%d/%Y %I:%M:%S %p')
+
+class Order:
+    def __init__(self, debug=False):
+        '''
+        This object contains all of the variables necessary for the partial
+        lnprob calculation for one echelle order. It is designed to first be
+        instantiated within the main processes and then forked to other
+        subprocesses. Once operating in the subprocess, the variables specific
+        to the order are loaded with an `INIT` message call, which tells which key
+        to initialize on in the `self.initialize()`.
+        '''
+        self.lnprob = -np.inf
+        self.lnprob_last = -np.inf
+
+        self.debug = debug
+
+    def initialize(self, key):
+        '''
+        Initialize to the correct chunk of data (echelle order).
+
+        :param key: (spectrum_id, order_key)
+        :param type: (int, int)
+
+        This method should only be called after all subprocess have been forked.
+        '''
+
+        self.id = key
+        spectrum_id, self.order_key = self.id
+        # Make sure these are ints
+        self.spectrum_id = int(spectrum_id)
+
+        self.instrument = Instruments[self.spectrum_id]
+        self.dataSpectrum = DataSpectra[self.spectrum_id]
+        self.wl = self.dataSpectrum.wls[self.order_key]
+        self.fl = self.dataSpectrum.fls[self.order_key]
+        self.sigma = self.dataSpectrum.sigmas[self.order_key]
+        self.ndata = len(self.wl)
+        self.mask = self.dataSpectrum.masks[self.order_key]
+        self.order = int(self.dataSpectrum.orders[self.order_key])
+
+        self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order))
+        if self.debug:
+            self.logger.setLevel(logging.DEBUG)
+        else:
+            self.logger.setLevel(logging.INFO)
+
+        self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_key))
+
+        self.npoly = Starfish.config["cheb_degree"]
+        self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly)
+
+        # If the file exists, optionally initiliaze to the chebyshev values
+        fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json"
+        if os.path.exists(fname):
+            self.logger.debug("Loading stored Chebyshev parameters.")
+            phi = PhiParam.load(fname)
+            self.chebyshevSpectrum.update(phi.cheb)
+
+        #self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging
+        self.counter = 0
+
+        self.emulator = Emulator.open()
+        self.emulator.determine_chunk_log(self.wl)
+        self.F_bol_interp = F_bol_interp(Starfish.grid_tools.HDF5Interface())
+
+        self.pca = self.emulator.pca
+
+        self.wl_FFT = self.pca.wl
+
+        # The raw eigenspectra and mean flux components
+        self.EIGENSPECTRA = np.vstack((self.pca.flux_mean[np.newaxis,:], self.pca.flux_std[np.newaxis,:], self.pca.eigenspectra))
+
+        self.ss = np.fft.rfftfreq(self.pca.npix, d=self.emulator.dv)
+        self.ss[0] = 0.01 # junk so we don't get a divide by zero error
+
+        # Holders to store the convolved and resampled eigenspectra
+        self.eigenspectra = np.empty((self.pca.m, self.ndata))
+        self.flux_mean = np.empty((self.ndata,))
+        self.flux_std = np.empty((self.ndata,))
+
+        self.sigma_mat = self.sigma**2 * np.eye(self.ndata)
+        self.mus, self.C_GP, self.data_mat = None, None, None
+        self.Omega = None
+        self.Omega2 = None
+        self.BB_lam = np.empty((self.ndata,))
+        self.qq = None
+
+        self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob
+
+        # self.nregions = 0
+        # self.exceptions = []
+
+        # Update the outdir based upon id
+        self.noutdir = Starfish.routdir + "{}/{}/".format(self.spectrum_id, self.order)
+
+
+    def lnprob_Theta(self, p):
+        '''
+        Update the model to the Theta parameters and then evaluate the lnprob.
+
+        Intended to be called from the master process via the command "LNPROB".
+        '''
+        try:
+            self.update_Theta(p)
+            lnp, raw_models = self.evaluate() # Also sets self.lnprob to new value
+            return lnp, raw_models
+        except C.ModelError:
+            self.logger.debug("ModelError in stellar parameters, sending back -np.inf {}".format(p))
+            return -np.inf, []
+
+    def evaluate(self):
+        '''
+        Return the lnprob using the current version of the C_GP matrix, data matrix,
+        and other intermediate products.
+        '''
+
+        self.lnprob_last = self.lnprob
+
+        X = (self.chebyshevSpectrum.k * self.flux_std * np.eye(self.ndata)).dot(self.eigenspectra.T)
+
+        CC = self.Omega**2 * X.dot(self.C_GP.dot(X.T)) + self.data_mat
+
+        try:
+            factor, flag = cho_factor(CC)
+        except np.linalg.linalg.LinAlgError:
+            print("Spectrum:", self.spectrum_id, "Order:", self.order)
+            self.CC_debugger(CC)
+            raise
+
+        try:
+            # Stellar photosphere 
+            model1 = self.Omega * (self.chebyshevSpectrum.k * self.flux_mean - X.dot(self.mus))
+            # Black body
+            model2 = self.Omega2 * self.qq * self.BB_lam.value * self.chebyshevSpectrum.k
+
+            # Return a "metadata blob" of both spectrum models
+            raw_models = [model1, model2]
+
+            net_model = model1 + model2
+            R = self.fl - net_model
+
+            logdet = np.sum(2 * np.log((np.diag(factor))))
+            self.lnprob = -0.5 * (np.dot(R, cho_solve((factor, flag), R)) + logdet)
+
+            self.logger.debug("Evaluating lnprob={}".format(self.lnprob))
+            return self.lnprob, raw_models
+
+        # To give us some debugging information about what went wrong.
+        except np.linalg.linalg.LinAlgError:
+            print("Spectrum:", self.spectrum_id, "Order:", self.order)
+            raise
+
+
+    def CC_debugger(self, CC):
+        '''
+        Special debugging information for the covariance matrix decomposition.
+        '''
+        print('{:-^60}'.format('CC_debugger'))
+        print("See https://github.com/iancze/Starfish/issues/26")
+        print("Covariance matrix at a glance:")
+        if (CC.diagonal().min() < 0.0):
+            print("- Negative entries on the diagonal:")
+            print("\t- Check sigAmp: should be positive")
+            print("\t- Check uncertainty estimates: should all be positive")
+        elif np.any(np.isnan(CC.diagonal())):
+            print("- Covariance matrix has a NaN value on the diagonal")
+        else:
+            if not np.allclose(CC, CC.T):
+                print("- The covariance matrix is highly asymmetric")
+
+            #Still might have an asymmetric matrix below `allclose` threshold
+            evals_CC, evecs_CC = np.linalg.eigh(CC)
+            n_neg = (evals_CC < 0).sum()
+            n_tot = len(evals_CC)
+            print("- There are {} negative eigenvalues out of {}.".format(n_neg, n_tot))
+            mark = lambda val: '>' if val < 0 else '.'
+
+            print("Covariance matrix eigenvalues:")
+            print(*["{: >6} {:{fill}>20.3e}".format(i, evals_CC[i], 
+                                                    fill=mark(evals_CC[i])) for i in range(10)], sep='\n')
+            print('{: >15}'.format('...'))
+            print(*["{: >6} {:{fill}>20.3e}".format(n_tot-10+i, evals_CC[-10+i], 
+                                                   fill=mark(evals_CC[-10+i])) for i in range(10)], sep='\n')
+        print('{:-^60}'.format('-'))
+
+
+    def update_Theta(self, p):
+        '''
+        Update the model to the current Theta parameters.
+
+        :param p: parameters to update model to
+        :type p: model.ThetaParam
+        '''
+
+        # durty HACK to get fixed logg
+        # Simply fixes the middle value to be 4.29
+        # Check to see if it exists, as well
+        fix_logg = Starfish.config.get("fix_logg", None)
+        if fix_logg is not None:
+            p.grid[1] = fix_logg
+        #print("grid pars are", p.grid)
+
+        self.logger.debug("Updating Theta parameters to {}".format(p))
+
+        # Store the current accepted values before overwriting with new proposed values.
+        self.flux_mean_last = self.flux_mean.copy()
+        self.flux_std_last = self.flux_std.copy()
+        self.eigenspectra_last = self.eigenspectra.copy()
+        self.mus_last = self.mus
+        self.C_GP_last = self.C_GP
+
+        # Local, shifted copy of wavelengths
+        wl_FFT = self.wl_FFT * np.sqrt((C.c_kms + p.vz) / (C.c_kms - p.vz))
+
+        # If vsini is less than 0.2 km/s, we might run into issues with
+        # the grid spacing. Therefore skip the convolution step if we have
+        # values smaller than this.
+        # FFT and convolve operations
+        if p.vsini < 0.0:
+            raise C.ModelError("vsini must be positive")
+        elif p.vsini < 0.2:
+            # Skip the vsini taper due to instrumental effects
+            eigenspectra_full = self.EIGENSPECTRA.copy()
+        else:
+            FF = np.fft.rfft(self.EIGENSPECTRA, axis=1)
+
+            # Determine the stellar broadening kernel
+            ub = 2. * np.pi * p.vsini * self.ss
+            sb = j1(ub) / ub - 3 * np.cos(ub) / (2 * ub ** 2) + 3. * np.sin(ub) / (2 * ub ** 3)
+            # set zeroth frequency to 1 separately (DC term)
+            sb[0] = 1.
+
+            # institute vsini taper
+            FF_tap = FF * sb
+
+            # do ifft
+            eigenspectra_full = np.fft.irfft(FF_tap, self.pca.npix, axis=1)
+
+        # Spectrum resample operations
+        if min(self.wl) < min(wl_FFT) or max(self.wl) > max(wl_FFT):
+            raise RuntimeError("Data wl grid ({:.2f},{:.2f}) must fit within the range of wl_FFT ({:.2f},{:.2f})".format(min(self.wl), max(self.wl), min(wl_FFT), max(wl_FFT)))
+
+        # Take the output from the FFT operation (eigenspectra_full), and stuff them
+        # into respective data products
+        for lres, hres in zip(chain([self.flux_mean, self.flux_std], self.eigenspectra), eigenspectra_full):
+            interp = InterpolatedUnivariateSpline(wl_FFT, hres, k=5)
+            lres[:] = interp(self.wl)
+            del interp
+
+        # Helps keep memory usage low, seems like the numpy routine is slow
+        # to clear allocated memory for each iteration.
+        gc.collect()
+
+        F_bol1 = self.F_bol_interp.interp(p.grid)
+        F_bol2 = stef_boltz * p.T_BB**4.0 ## Radiance from black body
+        self.qq = F_bol2/F_bol1[0]
+        # Adjust flux_mean and flux_std by Omega
+        #Omega = 10**p.logOmega
+        #self.flux_mean *= Omega
+        #self.flux_std *= Omega
+
+        self.BB_lam = blackbody_lambda(self.wl, p.T_BB)
+        
+        self.Omega = 10**p.logOmega
+
+        # Now update the parameters from the emulator
+        # If pars are outside the grid, Emulator will raise C.ModelError
+        self.emulator.params = p.grid
+        self.mus, self.C_GP = self.emulator.matrix
+
+        self.mus2, self.C_GP2 = self.emulator.matrix
+        self.Omega = 10**p.logOmega
+        self.Omega2 = 10**p.logOmega2
+
+
+class SampleThetaPhi(Order):
+
+    def initialize(self, key):
+        # Run through the standard initialization
+        super().initialize(key)
+
+        # for now, start with white noise
+        self.data_mat = self.sigma_mat.copy()
+        self.data_mat_last = self.data_mat.copy()
+
+        #Set up p0 and the independent sampler
+        fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json"
+        phi = PhiParam.load(fname)
+
+        # Set the regions to None, since we don't want to include them even if they
+        # are there
+        phi.regions = None
+
+        #Loading file that was previously output
+        # Convert PhiParam object to an array
+        self.p0 = phi.toarray()
+
+        jump = Starfish.config["Phi_jump"]
+        cheb_len = (self.npoly - 1) if self.chebyshevSpectrum.fix_c0 else self.npoly
+        cov_arr = np.concatenate((Starfish.config["cheb_jump"]**2 * np.ones((cheb_len,)), np.array([jump["sigAmp"], jump["logAmp"], jump["l"]])**2 ))
+        cov = np.diag(cov_arr)
+
+        def lnfunc(p):
+            # Convert p array into a PhiParam object
+            ind = self.npoly
+            if self.chebyshevSpectrum.fix_c0:
+                ind -= 1
+
+            cheb = p[0:ind]
+            sigAmp = p[ind]
+            ind+=1
+            logAmp = p[ind]
+            ind+=1
+            l = p[ind]
+
+            par = PhiParam(self.spectrum_id, self.order, self.chebyshevSpectrum.fix_c0, cheb, sigAmp, logAmp, l)
+
+            self.update_Phi(par)
+
+            # sigAmp must be positive (this is effectively a prior)
+            # See https://github.com/iancze/Starfish/issues/26
+            if not (0.0 < sigAmp): 
+                self.lnprob_last = self.lnprob
+                lnp = -np.inf
+                self.logger.debug("sigAmp was negative, returning -np.inf")
+                self.lnprob = lnp # Same behavior as self.evaluate()
+            else:
+                lnp, raw_models = self.evaluate()
+                self.logger.debug("Evaluated Phi parameters: {} {}".format(par, lnp))
+
+            return lnp, raw_models
+
+
+    def update_Phi(self, p):
+        self.logger.debug("Updating nuisance parameters to {}".format(p))
+
+        # Read off the Chebyshev parameters and update
+        self.chebyshevSpectrum.update(p.cheb)
+
+        # Check to make sure the global covariance parameters make sense
+        #if p.sigAmp < 0.1:
+        #   raise C.ModelError("sigAmp shouldn't be lower than 0.1, something is wrong.")
+
+        max_r = 6.0 * p.l # [km/s]
+
+        # Create a partial function which returns the proper element.
+        k_func = make_k_func(p)
+
+        # Store the previous data matrix in case we want to revert later
+        self.data_mat_last = self.data_mat
+        self.data_mat = get_dense_C(self.wl, k_func=k_func, max_r=max_r) + p.sigAmp*self.sigma_mat
+
+
+# Run the program.
+
+model = SampleThetaPhi(debug=True)
+
+model.initialize((0,0))
+
+def lnlike(p):
+    # Now we can proceed with the model
+    try:
+        pars1 = ThetaParam(grid=p[0:3], vz=p[3], vsini=p[4], logOmega=p[5], T_BB=p[6], logOmega2=p[7])
+        model.update_Theta(pars1)
+        # hard code npoly=3 (for fixc0 = True with npoly=4)
+        pars2 = PhiParam(0, 0, True, p[8:11], p[11], p[12], p[13])
+        model.update_Phi(pars2)
+        lnp, raw_models = model.evaluate()
+        return lnp, raw_models
+    except C.ModelError:
+        model.logger.debug("ModelError in stellar parameters, sending back -np.inf {}".format(p))
+        return -np.inf, []
+
+
+def lnprior(p):
+    print(p[11])
+    if not ( p[11] > 0):
+        return -1.0*np.inf
+
+
+
+# Try to load a user-defined prior
+try:
+    sourcepath_env = Starfish.config['Theta_priors']
+    sourcepath = os.path.expandvars(sourcepath_env)
+    with open(sourcepath, 'r') as f:
+        sourcecode = f.read()
+    code = compile(sourcecode, sourcepath, 'exec')
+    exec(code)
+    lnprior = user_defined_lnprior
+    print("Using the user defined prior in {}".format(sourcepath_env))
+except:
+    pass
+
+
+# Insert the prior here
+def lnprob(p):
+    #print(p)
+    lp = lnprior(p)
+    if not np.isfinite(lp):
+        return -1.0*np.inf, []
+    lnlike_val, raw_models = lnlike(p)
+    return lp + lnlike_val, raw_models
+
+
+import emcee
+
+start = Starfish.config["Theta"]
+fname = Starfish.specfmt.format(model.spectrum_id, model.order) + "phi.json"
+phi0 = PhiParam.load(fname)
+
+ndim, nwalkers = 14, 40
+
+p0 = np.array(start["grid"] + [start["vz"], start["vsini"], start["logOmega"], start["T_BB"], start["logOmega2"]] + 
+             phi0.cheb.tolist() + [phi0.sigAmp, phi0.logAmp, phi0.l])
+
+p0_std = [5, 0.02, 0.02, 0.5, 0.5, -0.01, 5, -0.01, -0.005, -0.005, -0.005, 0.01, 0.001, 0.5]
+
+if args.resume:
+    p0_ball = np.load("emcee_chain.npy")[:,-1,:]
+else:
+    p0_ball = emcee.utils.sample_ball(p0, p0_std, size=nwalkers)
+
+n_threads = multiprocessing.cpu_count()
+sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, threads=n_threads)
+
+
+nsteps = args.samples
+ninc = args.incremental_save
+for i, (pos, lnp, state, raw_models) in enumerate(sampler.sample(p0_ball, iterations=nsteps)):
+    if (i+1) % ninc == 0:
+        time.ctime() 
+        t_out = time.strftime('%Y %b %d,%l:%M %p') 
+        print("{0}: {1:}/{2:} = {3:.1f}%".format(t_out, i, nsteps, 100 * float(i) / nsteps))
+        np.save('temp_emcee_chain.npy',sampler.chain)
+        np.save('temp_raw_models.npy',sampler.blobs)
+
+np.save('emcee_chain.npy',sampler.chain)
+np.save('raw_models.npy',sampler.blobs)
+
+print("The end.")
\ No newline at end of file