Skip to content

Commit

Permalink
Merge pull request #64 from eth-cscs/release-0.5.7
Browse files Browse the repository at this point in the history
Release 0.5.7
  • Loading branch information
statrita2004 authored Jan 30, 2020
2 parents 1d611c1 + 2c18f35 commit 4d413ed
Show file tree
Hide file tree
Showing 35 changed files with 2,382 additions and 325 deletions.
9 changes: 6 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
dist: trusty
dist: xenial
language: python
python:
- '3.4'
- '3.7'
addons:
apt:
sources:
- deadsnakes
packages:
- gfortran
- libboost-random-dev
- libpython3.4-dev
- python3.7-dev
- python3-numpy
- swig
- libmpich-dev
Expand All @@ -16,6 +18,7 @@ install:
- pip install -r requirements.txt
- pip install -r requirements/backend-mpi.txt
- pip install -r requirements/backend-spark.txt
- pip install -r requirements/optional-requirements.txt
script:
- make test
before_deploy:
Expand Down
14 changes: 9 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ algorithms and other likelihood-free inference schemes. It presently includes:
* ABCsubsim (ABC using subset simulation)
* PMC (Population Monte Carlo) using approximations of likelihood functions
* Random Forest Model Selection Scheme
* Semi-automatic summary selection
* Semi-automatic summary selection (with Neural networks)
* summary selection using distance learning (with Neural networks)

ABCpy addresses the needs of domain scientists and data
scientists by providing
Expand All @@ -26,10 +27,9 @@ scientists by providing
# Documentation
For more information, check out the

* [Documentation](http://abcpy.readthedocs.io/en/v0.5.6)
* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.5.6/examples) directory and
* [Reference](http://abcpy.readthedocs.io/en/v0.5.6/abcpy.html)

* [Documentation](http://abcpy.readthedocs.io/en/v0.5.7)
* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.5.7/examples) directory and
* [Reference](http://abcpy.readthedocs.io/en/v0.5.7/abcpy.html)

Further, we provide a
[collection of models](https://github.com/eth-cscs/abcpy-models) for which ABCpy
Expand Down Expand Up @@ -64,6 +64,10 @@ BibTex reference.

Publications in which ABCpy was applied:

* L. Pacchiardi, P. Künzli, M. Schöngens, B. Chopard, R. Dutta, "Distance-Learning for Approximate Bayesian
Computation to Model a Volcanic Eruption", 2020, Sankhya B, ISSN 0976-8394,
[DOI: 10.1007/s13571-019-00208-8](https://doi.org/10.1007/s13571-019-00208-8).

* R. Dutta, J. P. Onnela, A. Mira, "Bayesian Inference of Spreading Processes
on Networks", 2018, Proc. R. Soc. A, 474(2215), 20180129.

Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.5.6
0.5.7
File renamed without changes.
174 changes: 174 additions & 0 deletions abcpy/NN_utilities/algorithms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
try:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset
from abcpy.NN_utilities.datasets import Similarities, SiameseSimilarities, TripletSimilarities, \
ParameterSimulationPairs
from abcpy.NN_utilities.losses import ContrastiveLoss, TripletLoss
from abcpy.NN_utilities.networks import SiameseNet, TripletNet
from abcpy.NN_utilities.trainer import fit
except ImportError:
has_torch = False
else:
has_torch = True


def contrastive_training(samples, similarity_set, embedding_net, cuda, batch_size=16, n_epochs=200,
positive_weight=None, load_all_data_GPU=False, margin=1., lr=None, optimizer=None,
scheduler=None, start_epoch=0, verbose=False, optimizer_kwargs={}, scheduler_kwargs={},
loader_kwargs={}):
""" Implements the algorithm for the contrastive distance learning training of a neural network; need to be
provided with a set of samples and the corresponding similarity matrix"""

# If the dataset is small enough, we can speed up training by loading all on the GPU at beginning, by using
# load_all_data_GPU=True. It may crash if the dataset is too large. Note that in some cases using only CPU may still
# be quicker.

# Do all the setups

# need to use the Similarities and SiameseSimilarities datasets

similarities_dataset = Similarities(samples, similarity_set, "cuda" if cuda and load_all_data_GPU else "cpu")
pairs_dataset = SiameseSimilarities(similarities_dataset, positive_weight=positive_weight)

if cuda:
if load_all_data_GPU:
loader_kwargs_2 = {'num_workers': 0, 'pin_memory': False}
else:
loader_kwargs_2 = {'num_workers': 1, 'pin_memory': True}
else:
loader_kwargs_2 = {}

loader_kwargs.update(loader_kwargs_2)

pairs_train_loader = torch.utils.data.DataLoader(pairs_dataset, batch_size=batch_size, shuffle=True,
**loader_kwargs)

model_contrastive = SiameseNet(embedding_net)

if cuda:
model_contrastive.cuda()
loss_fn = ContrastiveLoss(margin)

if lr is None:
lr = 1e-3

if optimizer is None: # default value
optimizer = optim.Adam(embedding_net.parameters(), lr=lr, **optimizer_kwargs)
else:
optimizer = optimizer(embedding_net.parameters(), lr=lr, **optimizer_kwargs)

if scheduler is None: # default value, i.e. a dummy scheduler
scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=1, last_epoch=-1)
else:
scheduler = scheduler(optimizer, **scheduler_kwargs)

# now train:
fit(pairs_train_loader, model_contrastive, loss_fn, optimizer, scheduler, n_epochs, cuda, start_epoch=start_epoch)

return embedding_net


def triplet_training(samples, similarity_set, embedding_net, cuda, batch_size=16, n_epochs=400,
load_all_data_GPU=False, margin=1., lr=None, optimizer=None, scheduler=None, start_epoch=0,
verbose=False, optimizer_kwargs={}, scheduler_kwargs={}, loader_kwargs={}):
""" Implements the algorithm for the triplet distance learning training of a neural network; need to be
provided with a set of samples and the corresponding similarity matrix"""

# If the dataset is small enough, we can speed up training by loading all on the GPU at beginning, by using
# load_all_data_GPU=True. It may crash if the dataset is too large. Note that in some cases using only CPU may still
# be quicker.
# Do all the setups

# need to use the Similarities and TripletSimilarities datasets

similarities_dataset = Similarities(samples, similarity_set, "cuda" if cuda and load_all_data_GPU else "cpu")
triplets_dataset = TripletSimilarities(similarities_dataset)

if cuda:
if load_all_data_GPU:
loader_kwargs_2 = {'num_workers': 0, 'pin_memory': False}
else:
loader_kwargs_2 = {'num_workers': 1, 'pin_memory': True}
else:
loader_kwargs_2 = {}

loader_kwargs.update(loader_kwargs_2)

triplets_train_loader = torch.utils.data.DataLoader(triplets_dataset, batch_size=batch_size, shuffle=True,
**loader_kwargs)

model_triplet = TripletNet(embedding_net)

if cuda:
model_triplet.cuda()
loss_fn = TripletLoss(margin)

if lr is None:
lr = 1e-3

if optimizer is None: # default value
optimizer = optim.Adam(embedding_net.parameters(), lr=lr, **optimizer_kwargs)
else:
optimizer = optimizer(embedding_net.parameters(), lr=lr, **optimizer_kwargs)

if scheduler is None: # default value, i.e. a dummy scheduler
scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=1, last_epoch=-1)
else:
scheduler = scheduler(optimizer, **scheduler_kwargs)

# now train:
fit(triplets_train_loader, model_triplet, loss_fn, optimizer, scheduler, n_epochs, cuda, start_epoch=start_epoch)

return embedding_net


def FP_nn_training(samples, target, embedding_net, cuda, batch_size=1, n_epochs=50, load_all_data_GPU=False,
lr=1e-3, optimizer=None, scheduler=None, start_epoch=0, verbose=False, optimizer_kwargs={},
scheduler_kwargs={}, loader_kwargs={}):
""" Implements the algorithm for the training of a neural network based on regressing the values of the parameters
on the corresponding simulation outcomes; it is effectively a training with a mean squared error loss. Needs to be
provided with a set of samples and the corresponding parameters that generated the samples. Note that in this case
the network has to have same output size as the number of parameters, as the learned summary statistic will have the
same dimension as the parameter."""

# If the dataset is small enough, we can speed up training by loading all on the GPU at beginning, by using
# load_all_data_GPU=True. It may crash if the dataset is too large. Note that in some cases using only CPU may still
# be quicker.

# Do all the setups

dataset_FP_nn = ParameterSimulationPairs(samples, target, "cuda" if cuda and load_all_data_GPU else "cpu")

if cuda:
if load_all_data_GPU:
loader_kwargs_2 = {'num_workers': 0, 'pin_memory': False}
else:
loader_kwargs_2 = {'num_workers': 1, 'pin_memory': True}
else:
loader_kwargs_2 = {}

loader_kwargs.update(loader_kwargs_2)

data_loader_FP_nn = torch.utils.data.DataLoader(dataset_FP_nn, batch_size=batch_size, shuffle=True, **loader_kwargs)

if cuda:
embedding_net.cuda()
loss_fn = nn.MSELoss(reduction="mean")

if optimizer is None: # default value
optimizer = optim.Adam(embedding_net.parameters(), lr=lr, **optimizer_kwargs)
else:
optimizer = optimizer(embedding_net.parameters(), lr=lr, **optimizer_kwargs)

if scheduler is None: # default value, i.e. a dummy scheduler
scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=1, last_epoch=-1)
else:
scheduler = scheduler(optimizer, **scheduler_kwargs)

# now train:
fit(data_loader_FP_nn, embedding_net, loss_fn, optimizer, scheduler, n_epochs, cuda, start_epoch=start_epoch)

return embedding_net
Loading

0 comments on commit 4d413ed

Please sign in to comment.