Skip to content

Commit

Permalink
continuous logging
Browse files Browse the repository at this point in the history
  • Loading branch information
NicoNeureiter committed Feb 23, 2022
1 parent 579a0c5 commit b704959
Show file tree
Hide file tree
Showing 8 changed files with 293 additions and 222 deletions.
2 changes: 1 addition & 1 deletion experiments/south_america/scripts/run_sa_exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

# Sample
mc.warm_up()
mc.sample()
mc.sample(run=run)

# Save samples to file
mc.log_statistics()
Expand Down
4 changes: 2 additions & 2 deletions sbayes/__main__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from sbayes.cli import main
from sbayes.cli import cli

main()
cli()
111 changes: 76 additions & 35 deletions sbayes/cli.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,49 @@
import multiprocessing
from itertools import product

import argparse
from pathlib import Path
import tkinter as tk
from tkinter import filedialog

from sbayes.experiment_setup import Experiment
from sbayes.experiment_setup import Experiment, update_recursive
from sbayes.load_data import Data
from sbayes.mcmc_setup import MCMC
from sbayes.simulation import Simulation
from sbayes.mcmc_setup import MCMC


def run_experiment(
experiment: Experiment,
data: Data,
i_run: int,
custom_settings: dict = None,
):
if custom_settings is not None:
update_recursive(experiment.config, custom_settings)

def run_experiment(experiment, data, run):
mcmc = MCMC(data=data, experiment=experiment)
mcmc.log_setup()

# Warm-up
mcmc.warm_up()

# Sample from posterior
mcmc.sample()
mcmc.sample(run=i_run)

# Save samples to file
mcmc.log_statistics()
mcmc.save_samples(run=run)
mcmc.save_samples(run=i_run)

# Use the last sample as the new initial sample
return mcmc.samples['last_sample']


def main(config: Path = None,
experiment_name: str = None,
custom_settings: dict = None):
if config is None:
parser = argparse.ArgumentParser(
description="An MCMC algorithm to identify contact zones")
parser.add_argument("config", nargs="?", type=Path,
help="The JSON configuration file")
parser.add_argument("name", nargs="?", type=str,
help="The experiment name used for logging and as the name of the results directory.")
args = parser.parse_args()
config = args.config
experiment_name = args.name

# 0. Ask for config file via files-dialog, if not provided as argument.
if config is None:
tk.Tk().withdraw()
config = filedialog.askopenfilename(
title='Select a config file in JSON format.',
initialdir='..',
filetypes=(('json files', '*.json'),('all files', '*.*'))
)

def main(
config: Path,
experiment_name: str = None,
custom_settings: dict = None,
processes: int = 1,
):
# Initialize the experiment
experiment = Experiment(experiment_name=experiment_name,
config_file=config,
Expand All @@ -76,14 +70,30 @@ def main(config: Path = None,
data.log_loading()

# Rerun experiment to check for consistency
for run in range(experiment.config['mcmc']['runs']):
n_areas = experiment.config['model']['areas']
iterate_or_run(
x=n_areas,
config_setter=lambda x: experiment.config['model'].__setitem__('areas', x),
function=lambda x: run_experiment(experiment, data, run)
i_run_range = list(range(experiment.config['mcmc']['runs']))
n_areas_range = experiment.config['model']['areas']
if type(n_areas_range) not in [tuple, list, set]:
assert isinstance(n_areas_range, int)
n_areas_range = [n_areas_range]

def runner(i_run, n_areas):
run_experiment(
experiment=experiment,
data=data,
i_run=i_run,
custom_settings={'model': {'areas': n_areas}}
)

for i in i_run_range:
for k in n_areas_range:
runner(i, k)

# if processes <= 1:
# map(runner, product(i_run_range, n_areas_range))
# else:
# pool = multiprocessing.Pool(processes=processes)
# pool.map(runner, product(i_run_range, n_areas_range))


def iterate_over_parameter(values, config_setter, function, print_message=None):
"""Iterate over each value in ´values´, apply ´config_setter´ (to update the config
Expand All @@ -104,5 +114,36 @@ def iterate_or_run(x, config_setter, function, print_message=None):
function(x)


def cli():
"""Command line interface."""
parser = argparse.ArgumentParser(
description='An MCMC algorithm to identify contact zones')
parser.add_argument('config', type=Path,
help='The JSON configuration file')
parser.add_argument('name', nargs='?', type=str,
help='The experiment name used for logging and as the name of the results directory.')
parser.add_argument('threads', nargs='?', type=int, default=1,
help='The number of parallel processes.')

args = parser.parse_args()

config = args.config

# Ask for config file via files-dialog, if not provided as argument.
if config is None:
tk.Tk().withdraw()
config = filedialog.askopenfilename(
title='Select a config file in JSON format.',
initialdir='..',
filetypes=(('json files', '*.json'), ('all files', '*.*'))
)

main(
config=config,
experiment_name=args.name,
processes=args.threads
)


if __name__ == '__main__':
main()
cli()
126 changes: 36 additions & 90 deletions sbayes/mcmc_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,11 @@

""" Setup of the MCMC process """

import numpy as np
import typing
import typing as tp

from sbayes.postprocessing import contribution_per_area, match_areas, rank_areas
from sbayes.sampling.zone_sampling import Sample, ZoneMCMC, ZoneMCMCWarmup
from sbayes.util import normalize, samples2file
from sbayes.model import Model
from sbayes.simulation import Simulation
from sbayes.sampling.loggers import ResultsLogger, ParametersCSVLogger, AreasLogger, LikelihoodLogger


class MCMC:
Expand Down Expand Up @@ -52,15 +49,23 @@ def log_setup(self):
Ratio of contact steps (changing gamma): {mcmc_config['operators']['contact']}
''')

def sample(self, lh_per_area=True, initial_sample: typing.Optional[typing.Any] = None):
def sample(
self,
lh_per_area=True,
initial_sample: tp.Optional[Sample] = None,
run: int = 1
):
mcmc_config = self.config['mcmc']

if initial_sample is None:
initial_sample = self.sample_from_warm_up

sample_loggers = self.get_sample_loggers(run=run)

self.sampler = ZoneMCMC(
data=self.data,
model=self.model,
sample_loggers=sample_loggers,
initial_sample=initial_sample,
operators=mcmc_config['operators'],
p_grow_connected=mcmc_config['grow_to_adjacent'],
Expand All @@ -72,73 +77,19 @@ def sample(self, lh_per_area=True, initial_sample: typing.Optional[typing.Any] =
self.sampler.generate_samples(self.config['mcmc']['steps'],
self.config['mcmc']['samples'])

# Evaluate likelihood and prior for each zone alone (makes it possible to rank zones)
if lh_per_area:
contribution_per_area(self.sampler)
# # Evaluate likelihood and prior for each zone alone (makes it possible to rank zones)
# if lh_per_area:
# contribution_per_area(self.sampler)

self.samples = self.sampler.statistics

def eval_ground_truth(self, lh_per_area=True):
assert isinstance(self.data, Simulation)
simulation = self.data

# Evaluate the likelihood of the true sample in simulated data
# If the model includes inheritance use all weights, if not use only the first two weights (global, zone)
if self.config['model']['inheritance']:
weights = simulation.weights.copy()
else:
weights = normalize(simulation.weights[:, :2])

ground_truth = Sample(zones=simulation.areas,
weights=weights,
p_global=simulation.p_universal[np.newaxis, ...],
p_zones=simulation.p_contact,
p_families=simulation.p_inheritance)

ground_truth.everything_changed()

self.samples['true_zones'] = simulation.areas
self.samples['true_weights'] = weights
self.samples['true_p_global'] = simulation.p_universal[np.newaxis, ...]
self.samples['true_p_zones'] = simulation.p_contact
self.samples['true_p_families'] = simulation.p_inheritance
self.samples['true_ll'] = self.sampler.likelihood(ground_truth, 0)
self.samples['true_prior'] = self.sampler.prior(ground_truth, 0)
self.samples["true_families"] = simulation.families

if lh_per_area:
ground_truth_log_lh_single_area = []
ground_truth_prior_single_area = []
ground_truth_posterior_single_area = []

for z in range(len(simulation.areas)):
area = simulation.areas[np.newaxis, z]
p_zone = simulation.p_contact[np.newaxis, z]

# Define Model
ground_truth_single_zone = Sample(zones=area, weights=weights,
p_global=simulation.p_universal[np.newaxis, ...],
p_zones=p_zone, p_families=simulation.p_inheritance)
ground_truth_single_zone.everything_changed()

# Evaluate Likelihood and Prior
lh = self.sampler.likelihood(ground_truth_single_zone, 0)
prior = self.sampler.prior(ground_truth_single_zone, 0)

ground_truth_log_lh_single_area.append(lh)
ground_truth_prior_single_area.append(prior)
ground_truth_posterior_single_area.append(lh + prior)

self.samples['true_lh_single_zones'] = ground_truth_log_lh_single_area
self.samples['true_prior_single_zones'] = ground_truth_prior_single_area
self.samples['true_posterior_single_zones'] = ground_truth_posterior_single_area
self.sampler.print_statistics()

def warm_up(self):
mcmc_config = self.config['mcmc']
print(mcmc_config['warmup'])
warmup = ZoneMCMCWarmup(
data=self.data,
model=self.model,
sample_loggers=[],
n_chains=mcmc_config['warmup']['warmup_chains'],
operators=mcmc_config['operators'],
p_grow_connected=mcmc_config['grow_to_adjacent'],
Expand All @@ -153,35 +104,30 @@ def warm_up(self):
warm_up=True,
warm_up_steps=self.config['mcmc']['warmup']['warmup_steps'])

def save_samples(self, run=1):

# TODO move all of this:
# 1. log samples during MCMC run
# 2. match + rang areas in summarization
# 3. compute simulation stats in separate script

# self.samples = match_areas(self.samples)
# self.samples = rank_areas(self.samples)
def get_sample_loggers(self, run=1) -> tp.List[ResultsLogger]:
k = self.model.n_zones
base_dir = self.path_results / f'K{k}'
base_dir.mkdir(exist_ok=True)
params_path = base_dir / f'stats_K{k}_{run}.txt'
areas_path = base_dir / f'areas_K{k}_{run}.txt'
likelihood_path = base_dir / f'likelihood_K{k}_{run}.h5'

fi = 'K{K}'.format(K=self.config['model']['areas'])
run = '_{run}'.format(run=run)
pth = self.path_results / fi
ext = '.txt'
gt_pth = pth / 'ground_truth'
sample_loggers = [
ParametersCSVLogger(params_path, self.data),
AreasLogger(areas_path, self.data),
]
if not self.config['mcmc']['sample_from_prior']:
sample_loggers.append(LikelihoodLogger(likelihood_path, self.data))

paths = {'parameters': pth / ('stats_' + fi + run + ext),
'areas': pth / ('areas_' + fi + run + ext),
'gt': gt_pth / ('stats' + ext),
'gt_areas': gt_pth / ('areas' + ext)}
return sample_loggers

pth.mkdir(exist_ok=True)
""" OLD METHODS FOR COMPATIBILITY """

if self.data.is_simulated:
self.eval_ground_truth()
gt_pth.mkdir(exist_ok=True)
def mc_eval_ground_truth(self):
pass

samples2file(self.samples, self.data, self.config, paths)
def save_samples(self, run=1):
pass

def log_statistics(self):
# TODO log during MCMC run
self.sampler.print_statistics(self.samples)
pass
Loading

0 comments on commit b704959

Please sign in to comment.