From de0d0684c8b5e821bc8513aed7c5600c7b6ecb86 Mon Sep 17 00:00:00 2001 From: Max Linke Date: Sat, 31 Dec 2016 14:45:34 +0100 Subject: [PATCH 01/15] switch to joblib parallel processing This is included in scikit-learn. It does work stealing job balancing for us that helps to use the full processor power. The absolute biggest advantage of this is though that I can now include print/exception errors for debugging in the `conf_dist_function`. --- .../analysis/encore/confdistmatrix.py | 119 +++++------------- 1 file changed, 34 insertions(+), 85 deletions(-) diff --git a/package/MDAnalysis/analysis/encore/confdistmatrix.py b/package/MDAnalysis/analysis/encore/confdistmatrix.py index d7db931a9f3..fb5d1781910 100644 --- a/package/MDAnalysis/analysis/encore/confdistmatrix.py +++ b/package/MDAnalysis/analysis/encore/confdistmatrix.py @@ -44,6 +44,8 @@ class to compute an RMSD matrix in such a way is also available. from time import sleep import logging +from sklearn.externals.joblib import Parallel, delayed + from ...core.universe import Universe from ..align import rotation_matrix @@ -55,9 +57,9 @@ class to compute an RMSD matrix in such a way is also available. def conformational_distance_matrix(ensemble, - conf_dist_function, selection="", - superimposition_selection="", ncores=1, pairwise_align=True, - mass_weighted=True, metadata=True, *args, **kwargs): + conf_dist_function, selection="", + superimposition_selection="", ncores=1, pairwise_align=True, + mass_weighted=True, metadata=True, verbose=False): """ Run the conformational distance matrix calculation. args and kwargs are passed to conf_dist_function. @@ -160,73 +162,30 @@ def conformational_distance_matrix(ensemble, else: subset_masses = None - # matsize: number of elements of the triangular matrix, diagonal - # elements included. - matsize = framesn * (framesn + 1) / 2 - - # Calculate the number of matrix elements that each core has to - # calculate as equally as possible. - if ncores > matsize: - ncores = matsize - runs_per_worker = [matsize / int(ncores) for x in range(ncores)] - unfair_work = matsize % ncores - for i in range(unfair_work): - runs_per_worker[i] += 1 - - # Splice the matrix in ncores segments. Calculate the first and the - # last (i,j) matrix elements of the slices that will be assigned to - # each worker. Each of them will proceed in a column-then-row order - # (e.g. 0,0 1,0 1,1 2,0 2,1 2,2 ... ) - i = 0 - a = [0, 0] - b = [0, 0] - tasks_per_worker = [] - for n,r in enumerate(runs_per_worker): - while i * (i - 1) / 2 < np.sum(runs_per_worker[:n + 1]): - i += 1 - b = [i - 2, - np.sum(runs_per_worker[0:n + 1]) - (i - 2) * (i - 1) / 2 - 1] - tasks_per_worker.append((tuple(a), tuple(b))) - if b[0] == b[1]: - a[0] = b[0] + 1 - a[1] = 0 - else: - a[0] = b[0] - a[1] = b[1] + 1 - # Allocate for output matrix + matsize = framesn * (framesn + 1) / 2 distmat = Array(c_float, matsize) - # Prepare progress bar stuff and run it - pbar = AnimatedProgressBar(end=matsize, width=80) - partial_counters = [RawValue('i', 0) for i in range(ncores)] # Initialize workers. Simple worker doesn't perform fitting, # fitter worker does. - - workers = [Process(target=conf_dist_function, args=( - tasks_per_worker[i], + indices = trm_indeces((0, 0), (framesn - 1, framesn - 1)) + Parallel(n_jobs=ncores, verbose=verbose)(delayed(conf_dist_function)( + element, rmsd_coordinates, distmat, masses, fitting_coordinates, subset_masses, - partial_counters[i], - args, - kwargs)) for i in range(ncores)] + masses) for element in indices) - # Start & join the workers - for w in workers: - w.start() - for w in workers: - w.join() # When the workers have finished, return a TriangularMatrix object return TriangularMatrix(distmat, metadata=metadata) def set_rmsd_matrix_elements(tasks, coords, rmsdmat, masses, fit_coords=None, - fit_masses=None, pbar_counter=None, *args, **kwargs): + fit_masses=None, pbar_counter=None, *args, **kwargs): ''' RMSD Matrix calculator @@ -264,48 +223,38 @@ def set_rmsd_matrix_elements(tasks, coords, rmsdmat, masses, fit_coords=None, fit_masses : numpy.array Array of atomic masses, having the same order as the fit_coords array - - pbar_counter : multiprocessing.RawValue - Thread-safe shared value. This counter is updated at - every cycle and used to evaluate the progress of - each worker in a parallel calculation. ''' - + i, j = tasks if fit_coords is None and fit_masses is None: - for i, j in trm_indeces(tasks[0], tasks[1]): - summasses = np.sum(masses) - rmsdmat[(i + 1) * i / 2 + j] = PureRMSD(coords[i].astype(np.float64), - coords[j].astype(np.float64), - coords[j].shape[0], - masses, - summasses) + summasses = np.sum(masses) + rmsdmat[(i + 1) * i / 2 + j] = PureRMSD(coords[i].astype(np.float64), + coords[j].astype(np.float64), + coords[j].shape[0], + masses, + summasses) elif fit_coords is not None and fit_coords is not None: - for i, j in trm_indeces(tasks[0], tasks[1]): - summasses = np.sum(masses) - subset_weights = np.asarray(fit_masses) / np.mean(fit_masses) - com_i = np.average(fit_coords[i], axis=0, - weights=fit_masses) - translated_i = coords[i] - com_i - subset1_coords = fit_coords[i] - com_i - com_j = np.average(fit_coords[j], axis=0, - weights=fit_masses) - translated_j = coords[j] - com_j - subset2_coords = fit_coords[j] - com_j - rotamat = rotation_matrix(subset1_coords, subset2_coords, - subset_weights)[0] - rotated_i = np.transpose(np.dot(rotamat, np.transpose(translated_i))) - rmsdmat[(i + 1) * i / 2 + j] = PureRMSD( - rotated_i.astype(np.float64), translated_j.astype(np.float64), - coords[j].shape[0], masses, summasses) - + summasses = np.sum(masses) + subset_weights = np.asarray(fit_masses) / np.mean(fit_masses) + com_i = np.average(fit_coords[i], axis=0, + weights=fit_masses) + translated_i = coords[i] - com_i + subset1_coords = fit_coords[i] - com_i + com_j = np.average(fit_coords[j], axis=0, + weights=fit_masses) + translated_j = coords[j] - com_j + subset2_coords = fit_coords[j] - com_j + rotamat = rotation_matrix(subset1_coords, subset2_coords, + subset_weights)[0] + rotated_i = np.transpose(np.dot(rotamat, np.transpose(translated_i))) + rmsdmat[(i + 1) * i / 2 + j] = PureRMSD( + rotated_i.astype(np.float64), translated_j.astype(np.float64), + coords[j].shape[0], masses, summasses) else: raise TypeError("Both fit_coords and fit_masses must be specified \ if one of them is given") - if pbar_counter is not None: - pbar_counter.value += 1 def pbar_updater(pbar, pbar_counters, max_val, update_interval=0.2): '''Method that updates and prints the progress bar, upon polling From 74be2a21c37f3f0f6429799ed70b625e0036f310 Mon Sep 17 00:00:00 2001 From: Max Linke Date: Sat, 31 Dec 2016 14:53:46 +0100 Subject: [PATCH 02/15] remove multiprocessing completely now we only rely on numpy functions and joblib --- package/MDAnalysis/analysis/encore/confdistmatrix.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/analysis/encore/confdistmatrix.py b/package/MDAnalysis/analysis/encore/confdistmatrix.py index fb5d1781910..ec65757d24e 100644 --- a/package/MDAnalysis/analysis/encore/confdistmatrix.py +++ b/package/MDAnalysis/analysis/encore/confdistmatrix.py @@ -36,8 +36,6 @@ class to compute an RMSD matrix in such a way is also available. """ import numpy as np -from multiprocessing import Process, Array, RawValue -from ctypes import c_float from getpass import getuser from socket import gethostname from datetime import datetime @@ -164,7 +162,7 @@ def conformational_distance_matrix(ensemble, # Allocate for output matrix matsize = framesn * (framesn + 1) / 2 - distmat = Array(c_float, matsize) + distmat = np.empty(matsize, np.float64) # Initialize workers. Simple worker doesn't perform fitting, @@ -228,8 +226,8 @@ def set_rmsd_matrix_elements(tasks, coords, rmsdmat, masses, fit_coords=None, if fit_coords is None and fit_masses is None: summasses = np.sum(masses) - rmsdmat[(i + 1) * i / 2 + j] = PureRMSD(coords[i].astype(np.float64), - coords[j].astype(np.float64), + rmsdmat[(i + 1) * i / 2 + j] = PureRMSD(coords[i], + coords[j], coords[j].shape[0], masses, summasses) From eb7bf3bbba4ae5a79056c24e9aba10309fb94dcd Mon Sep 17 00:00:00 2001 From: Max Linke Date: Sat, 31 Dec 2016 14:54:08 +0100 Subject: [PATCH 03/15] refactor TriangularMatrix This gives the initialization more freedom. We can have more types to choose from and the metadata can be passed in as a dict and still be correctly handled. --- package/MDAnalysis/analysis/encore/utils.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/package/MDAnalysis/analysis/encore/utils.py b/package/MDAnalysis/analysis/encore/utils.py index 4b3539843a6..8416316c9b2 100644 --- a/package/MDAnalysis/analysis/encore/utils.py +++ b/package/MDAnalysis/analysis/encore/utils.py @@ -44,34 +44,36 @@ def __init__(self, size, metadata=None, loadfile=None): Parameters ---------- - size : int or multiprocessing.SyncrhonizeArray + size : int / array_like Size of the matrix (number of rows or columns). If an array is provided instead, the size of the triangular matrix will be calculated and the array copied as the matrix elements. Otherwise, the matrix is just initialized to zero. - metadata : dict or None Metadata dictionary. Used to generate the metadata attribute. - loadfile : str or None Load the matrix from this file. All the attributes and data will be determined by the matrix file itself (i.e. metadata will be ignored); size has to be provided though. """ - self.metadata = metadata + if isinstance(metadata, dict): + self.metadata = np.array(metadata.items(), dtype=object) + else: + self.metadata = metadata + self.size = size if loadfile: self.loadz(loadfile) - return - if type(size) == int: + elif isinstance(size, int): self.size = size self._elements = np.zeros((size + 1) * size / 2, dtype=np.float64) - return - if type(size) == SynchronizedArray: + elif isinstance(size, SynchronizedArray): self._elements = np.array(size.get_obj(), dtype=np.float64) self.size = int((np.sqrt(1 + 8 * len(size)) - 1) / 2) - return + elif isinstance(size, np.ndarray): + self._elements = size + self.size = int((np.sqrt(1 + 8 * len(size)) - 1) / 2) else: raise TypeError From a0c545582ccb819e238cbda4445d95ccb4bd0cd9 Mon Sep 17 00:00:00 2001 From: Max Linke Date: Sat, 31 Dec 2016 14:58:51 +0100 Subject: [PATCH 04/15] remove unused progressbar code --- .../analysis/encore/confdistmatrix.py | 61 +++---------------- package/MDAnalysis/analysis/encore/utils.py | 20 ------ 2 files changed, 10 insertions(+), 71 deletions(-) diff --git a/package/MDAnalysis/analysis/encore/confdistmatrix.py b/package/MDAnalysis/analysis/encore/confdistmatrix.py index ec65757d24e..7a44e2e7bc9 100644 --- a/package/MDAnalysis/analysis/encore/confdistmatrix.py +++ b/package/MDAnalysis/analysis/encore/confdistmatrix.py @@ -49,8 +49,7 @@ class to compute an RMSD matrix in such a way is also available. from ..align import rotation_matrix from .cutils import PureRMSD -from .utils import TriangularMatrix, trm_indeces, \ - AnimatedProgressBar +from .utils import TriangularMatrix, trm_indeces @@ -254,40 +253,6 @@ def set_rmsd_matrix_elements(tasks, coords, rmsdmat, masses, fit_coords=None, if one of them is given") -def pbar_updater(pbar, pbar_counters, max_val, update_interval=0.2): - '''Method that updates and prints the progress bar, upon polling - progress status from workers. - - Parameters - ---------- - - pbar : encore.utils.AnimatedProgressBar object - Progress bar object - - pbar_counters : list of multiprocessing.RawValue - List of counters. Each worker is given a counter, which is updated - at every cycle. In this way the _pbar_updater process can - asynchronously fetch progress reports. - - max_val : int - Total number of matrix elements to be calculated - - update_interval : float - Number of seconds between progress bar updates - - ''' - - val = 0 - while val < max_val: - val = 0 - for c in pbar_counters: - val += c.value - pbar.update(val) - pbar.show_progress() - sleep(update_interval) - - - def get_distance_matrix(ensemble, selection="name CA", load_matrix=None, @@ -296,6 +261,7 @@ def get_distance_matrix(ensemble, superimposition_subset="name CA", mass_weighted=True, ncores=1, + verbose=False, *conf_dist_args, **conf_dist_kwargs): """ @@ -316,34 +282,28 @@ def get_distance_matrix(ensemble, Parameters ---------- - ensemble : Universe - selection : str Atom selection string in the MDAnalysis format. Default is "name CA" - load_matrix : str, optional Load similarity/dissimilarity matrix from numpy binary file instead of calculating it (default is None). A filename is required. - save_matrix : bool, optional Save calculated matrix as numpy binary file (default is None). A filename is required. - superimpose : bool, optional Whether to superimpose structures before calculating distance (default is True). - superimposition_subset : str, optional Group for superimposition using MDAnalysis selection syntax (default is CA atoms: "name CA") - mass_weighted : bool, optional calculate a mass-weighted RMSD (default is True). If set to False the superimposition will also not be mass-weighted. - ncores : int, optional Maximum number of cores to be used (default is 1) + verbose : bool, optional + print progress Returns ------- @@ -393,13 +353,12 @@ def get_distance_matrix(ensemble, # Use superimposition subset, if necessary. If the pairwise alignment # is not required, it will not be performed anyway. confdistmatrix = conformational_distance_matrix(ensemble, - conf_dist_function=set_rmsd_matrix_elements, - selection=selection, - pairwise_align=superimpose, - mass_weighted=mass_weighted, - ncores=ncores, - *conf_dist_args, - kwargs=conf_dist_kwargs) + conf_dist_function=set_rmsd_matrix_elements, + selection=selection, + pairwise_align=superimpose, + mass_weighted=mass_weighted, + ncores=ncores, + verbose=verbose) logging.info(" Done!") diff --git a/package/MDAnalysis/analysis/encore/utils.py b/package/MDAnalysis/analysis/encore/utils.py index 8416316c9b2..73c94c5160f 100644 --- a/package/MDAnalysis/analysis/encore/utils.py +++ b/package/MDAnalysis/analysis/encore/utils.py @@ -380,26 +380,6 @@ def update(self, progress): self.progress = 100 -class AnimatedProgressBar(ProgressBar): - """Extends ProgressBar to allow you to use it straighforward on a script. - Accepts an extra keyword argument named `stdout` - (by default use sys.stdout). - The progress status may be send to any file-object. - """ - - def __init__(self, *args, **kwargs): - super(AnimatedProgressBar, self).__init__(*args, **kwargs) - self.stdout = kwargs.get('stdout', sys.stdout) - - def show_progress(self): - if hasattr(self.stdout, 'isatty') and self.stdout.isatty(): - self.stdout.write('\r') - else: - self.stdout.write('\n') - self.stdout.write(str(self)) - self.stdout.flush() - - def trm_indeces(a, b): """ Generate (i,j) indeces of a triangular matrix, between elements a and b. From 91e3a2a95547a006710c41efa6fb3eef64139c3b Mon Sep 17 00:00:00 2001 From: Max Linke Date: Sat, 31 Dec 2016 15:04:34 +0100 Subject: [PATCH 05/15] use n_jobs instead of ncores --- .../analysis/encore/confdistmatrix.py | 21 +++++---------- package/MDAnalysis/analysis/encore/utils.py | 19 ++++++------- .../MDAnalysisTests/analysis/test_encore.py | 27 +++++++++---------- 3 files changed, 30 insertions(+), 37 deletions(-) diff --git a/package/MDAnalysis/analysis/encore/confdistmatrix.py b/package/MDAnalysis/analysis/encore/confdistmatrix.py index 7a44e2e7bc9..b7f862a8d4a 100644 --- a/package/MDAnalysis/analysis/encore/confdistmatrix.py +++ b/package/MDAnalysis/analysis/encore/confdistmatrix.py @@ -55,7 +55,7 @@ class to compute an RMSD matrix in such a way is also available. def conformational_distance_matrix(ensemble, conf_dist_function, selection="", - superimposition_selection="", ncores=1, pairwise_align=True, + superimposition_selection="", n_jobs=1, pairwise_align=True, mass_weighted=True, metadata=True, verbose=False): """ Run the conformational distance matrix calculation. @@ -84,9 +84,9 @@ def conformational_distance_matrix(ensemble, Whether to build a metadata dataset for the calculated matrix. Default is True. - ncores : int + n_jobs : int Number of cores to be used for parallel calculation - Default is 1. + Default is 1. -1 uses all available cores Returns ------- @@ -96,13 +96,6 @@ def conformational_distance_matrix(ensemble, """ - # Decide how many cores have to be used. Since the main process is - # stopped while the workers do their job, ncores workers will be - # spawned. - - if ncores < 1: - ncores = 1 - # framesn: number of frames framesn = len(ensemble.trajectory.timeseries( ensemble.select_atoms(selection), format='fac')) @@ -167,7 +160,7 @@ def conformational_distance_matrix(ensemble, # Initialize workers. Simple worker doesn't perform fitting, # fitter worker does. indices = trm_indeces((0, 0), (framesn - 1, framesn - 1)) - Parallel(n_jobs=ncores, verbose=verbose)(delayed(conf_dist_function)( + Parallel(n_jobs=n_jobs, verbose=verbose)(delayed(conf_dist_function)( element, rmsd_coordinates, distmat, @@ -260,7 +253,7 @@ def get_distance_matrix(ensemble, superimpose=True, superimposition_subset="name CA", mass_weighted=True, - ncores=1, + n_jobs=1, verbose=False, *conf_dist_args, **conf_dist_kwargs): @@ -300,7 +293,7 @@ def get_distance_matrix(ensemble, mass_weighted : bool, optional calculate a mass-weighted RMSD (default is True). If set to False the superimposition will also not be mass-weighted. - ncores : int, optional + n_jobs : int, optional Maximum number of cores to be used (default is 1) verbose : bool, optional print progress @@ -357,7 +350,7 @@ def get_distance_matrix(ensemble, selection=selection, pairwise_align=superimpose, mass_weighted=mass_weighted, - ncores=ncores, + n_jobs=n_jobs, verbose=verbose) logging.info(" Done!") diff --git a/package/MDAnalysis/analysis/encore/utils.py b/package/MDAnalysis/analysis/encore/utils.py index 73c94c5160f..bff523826f1 100644 --- a/package/MDAnalysis/analysis/encore/utils.py +++ b/package/MDAnalysis/analysis/encore/utils.py @@ -22,6 +22,7 @@ from six.moves import range from multiprocessing.sharedctypes import SynchronizedArray from multiprocessing import Process, Manager +from sklearn.externals.joblib import cpu_count import numpy as np import sys import MDAnalysis as mda @@ -179,9 +180,6 @@ def __imul__(self, scalar): self._elements *= scalar return self - - - __rmul__ = __mul__ def __str__(self): @@ -196,7 +194,7 @@ class ParallelCalculation(object): Attributes ---------- - ncores : int + n_jobs : int Number of cores to be used for parallel calculation function : callable object @@ -217,13 +215,13 @@ class ParallelCalculation(object): len(kwargs). """ - def __init__(self, ncores, function, args=None, kwargs=None): + def __init__(self, n_jobs, function, args=None, kwargs=None): """ Class constructor. Parameters ---------- - ncores : int + n_jobs : int Number of cores to be used for parallel calculation function : object that supports __call__, as functions @@ -239,7 +237,10 @@ class description. """ # args[i] should be a list of args, one for each run - self.ncores = ncores + self.n_jobs = n_jobs + if self.n_jobs == -1: + self.n_jobs = cpu_count() + self.functions = function if not hasattr(self.functions, '__iter__'): self.functions = [self.functions]*len(args) @@ -296,7 +297,7 @@ def run(self): is the return of function(\*args[3], \*\*kwargs[3]). """ results_list = [] - if self.ncores == 1: + if self.n_jobs == 1: for i in range(self.nruns): results_list.append((i, self.functions[i](*self.args[i], **self.kwargs[i]))) @@ -306,7 +307,7 @@ def run(self): results = manager.Queue() workers = [Process(target=self.worker, args=(q, results)) for i in - range(self.ncores)] + range(self.n_jobs)] for i in range(self.nruns): q.put(i) diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py index c831aba0599..0191db1ef87 100644 --- a/testsuite/MDAnalysisTests/analysis/test_encore.py +++ b/testsuite/MDAnalysisTests/analysis/test_encore.py @@ -98,24 +98,24 @@ def test_triangular_matrix(): assert_equal(triangular_matrix[0,1], expected_value, err_msg="Data error in TriangularMatrix: read/write are not consistent") - assert_equal(triangular_matrix[0,1], triangular_matrix[1,0], + assert_equal(triangular_matrix[0,1], triangular_matrix[1,0], err_msg="Data error in TriangularMatrix: matrix non symmetrical") triangular_matrix.savez(filename) triangular_matrix_2 = encore.utils.TriangularMatrix(size = size, loadfile = filename) - assert_equal(triangular_matrix_2[0,1], expected_value, + assert_equal(triangular_matrix_2[0,1], expected_value, err_msg="Data error in TriangularMatrix: loaded matrix non symmetrical") triangular_matrix_3 = encore.utils.TriangularMatrix(size = size) triangular_matrix_3.loadz(filename) - assert_equal(triangular_matrix_3[0,1], expected_value, + assert_equal(triangular_matrix_3[0,1], expected_value, err_msg="Data error in TriangularMatrix: loaded matrix non symmetrical") incremented_triangular_matrix = triangular_matrix + scalar assert_equal(incremented_triangular_matrix[0,1], expected_value + scalar, err_msg="Error in TriangularMatrix: addition of scalar gave\ inconsistent results") - + triangular_matrix += scalar assert_equal(triangular_matrix[0,1], expected_value + scalar, err_msg="Error in TriangularMatrix: addition of scalar gave\ @@ -140,29 +140,29 @@ def function(x): arguments = [tuple([i]) for i in np.arange(0,100)] - parallel_calculation = encore.utils.ParallelCalculation(function = function, - ncores = 4, - args = arguments) + parallel_calculation = encore.utils.ParallelCalculation(function=function, + n_jobs=4, + args=arguments) results = parallel_calculation.run() for i,r in enumerate(results): assert_equal(r[1], arguments[i][0]**2, err_msg="Unexpeted results from ParallelCalculation") - def test_rmsd_matrix_with_superimposition(self): + def test_rmsd_matrix_with_superimposition(self): conf_dist_matrix = encore.confdistmatrix.conformational_distance_matrix(self.ens1, encore.confdistmatrix.set_rmsd_matrix_elements, selection = "name CA", pairwise_align = True, mass_weighted = True, - ncores = 1) + n_jobs = 1) reference = rms.RMSD(self.ens1, select = "name CA") reference.run() for i,rmsd in enumerate(reference.rmsd): assert_almost_equal(conf_dist_matrix[0,i], rmsd[2], decimal=3, - err_msg = "calculated RMSD values differ from the reference implementation") + err_msg = "calculated RMSD values differ from the reference implementation") def test_rmsd_matrix_without_superimposition(self): selection_string = "name CA" @@ -178,7 +178,7 @@ def test_rmsd_matrix_without_superimposition(self): selection = selection_string, pairwise_align = False, mass_weighted = True, - ncores = 1) + n_jobs = 1) print (repr(confdist_matrix.as_array()[0,:])) assert_almost_equal(confdist_matrix.as_array()[0,:], reference_rmsd, decimal=3, @@ -264,7 +264,7 @@ def test_ces(self): expected_value = 0.51 assert_almost_equal(result_value, expected_value, decimal=2, err_msg="Unexpected value for Cluster Ensemble Similarity: {0:f}. Expected {1:f}.".format(result_value, expected_value)) - + @dec.skipif(module_not_found('scipy'), "Test skipped because scipy is not available.") def test_dres_to_self(self): @@ -295,7 +295,7 @@ def test_dres_without_superimposition(self): expected_value = 0.68 assert_almost_equal(result_value, expected_value, decimal=1, err_msg="Unexpected value for Dim. reduction Ensemble Similarity: {0:f}. Expected {1:f}.".format(result_value, expected_value)) - + def test_ces_convergence(self): expected_values = [0.3443593, 0.1941854, 0.06857104, 0.] results = encore.ces_convergence(self.ens1, 5) @@ -793,4 +793,3 @@ def test_dimensionality_reduction_two_different_methods(self): method=[encore.StochasticProximityEmbeddingNative(dims[0]), encore.PrincipalComponentAnalysis(dims[1])]) assert_equal(coordinates[1].shape[0], dims[1]) - From 0f3dd34470d1dafdbdbcd818ecf77dbccad0cf3d Mon Sep 17 00:00:00 2001 From: Max Linke Date: Sat, 31 Dec 2016 15:10:44 +0100 Subject: [PATCH 06/15] update docs --- .../analysis/encore/confdistmatrix.py | 9 +---- package/MDAnalysis/analysis/encore/utils.py | 36 ++++++++----------- 2 files changed, 15 insertions(+), 30 deletions(-) diff --git a/package/MDAnalysis/analysis/encore/confdistmatrix.py b/package/MDAnalysis/analysis/encore/confdistmatrix.py index b7f862a8d4a..9bb7e700b96 100644 --- a/package/MDAnalysis/analysis/encore/confdistmatrix.py +++ b/package/MDAnalysis/analysis/encore/confdistmatrix.py @@ -63,34 +63,27 @@ def conformational_distance_matrix(ensemble, Parameters ---------- - ensemble : Universe object Universe object for which the conformational distance matrix will be computed. - conf_dist_function : function object Function that fills the matrix with conformational distance values. See set_rmsd_matrix_elements for an example. - pairwise_align : bool Whether to perform pairwise alignment between conformations. Default is True (do the superimposition) - mass_weighted : bool Whether to perform mass-weighted superimposition and metric calculation. Default is True. - metadata : bool Whether to build a metadata dataset for the calculated matrix. Default is True. - n_jobs : int Number of cores to be used for parallel calculation Default is 1. -1 uses all available cores Returns ------- - conf_dist_matrix : encore.utils.TriangularMatrix object Conformational distance matrix in triangular representation. @@ -294,7 +287,7 @@ def get_distance_matrix(ensemble, calculate a mass-weighted RMSD (default is True). If set to False the superimposition will also not be mass-weighted. n_jobs : int, optional - Maximum number of cores to be used (default is 1) + Maximum number of cores to be used (default is 1). If -1 use all cores. verbose : bool, optional print progress diff --git a/package/MDAnalysis/analysis/encore/utils.py b/package/MDAnalysis/analysis/encore/utils.py index bff523826f1..c5f1bae045f 100644 --- a/package/MDAnalysis/analysis/encore/utils.py +++ b/package/MDAnalysis/analysis/encore/utils.py @@ -193,44 +193,36 @@ class ParallelCalculation(object): Attributes ---------- - n_jobs : int - Number of cores to be used for parallel calculation - + Number of cores to be used for parallel calculation. If -1 use all + available cores. function : callable object - Function to be run in parallel. - + Function to be run in parallel. args : list of tuples - Each tuple contains the arguments that will be passed to - function(). This means that a call to function() is performed for - each tuple. function is called as function(\*args, \*\*kwargs). Runs - are distributed on the requested numbers of cores. - + Each tuple contains the arguments that will be passed to + function(). This means that a call to function() is performed for + each tuple. function is called as function(\*args, \*\*kwargs). Runs + are distributed on the requested numbers of cores. kwargs : list of dicts - Each tuple contains the named arguments that will be passed to - function, similarly as described for the args attribute. - + Each tuple contains the named arguments that will be passed to + function, similarly as described for the args attribute. nruns : int - Number of runs to be performed. Must be equal to len(args) and - len(kwargs). + Number of runs to be performed. Must be equal to len(args) and + len(kwargs). """ def __init__(self, n_jobs, function, args=None, kwargs=None): - """ Class constructor. - + """ Parameters ---------- - n_jobs : int - Number of cores to be used for parallel calculation - + Number of cores to be used for parallel calculation. If -1 use all + available cores. function : object that supports __call__, as functions function to be run in parallel. - args : list of tuples Arguments for function; see the ParallelCalculation class description. - kwargs : list of dicts or None kwargs for function; see the ParallelCalculation class description. From 47858ca1a57db2209d99e387fdef7af174f09e1b Mon Sep 17 00:00:00 2001 From: Max Linke Date: Sun, 1 Jan 2017 16:47:09 +0100 Subject: [PATCH 07/15] deactivate tests in minimal build --- .../MDAnalysisTests/analysis/test_encore.py | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py index 0191db1ef87..e207712845a 100644 --- a/testsuite/MDAnalysisTests/analysis/test_encore.py +++ b/testsuite/MDAnalysisTests/analysis/test_encore.py @@ -40,6 +40,8 @@ class TestEncore(TestCase): @dec.skipif(parser_not_found('DCD'), 'DCD parser not available. Are you using python 3?') + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def setUp(self): # Create universe from templates defined in setUpClass self.ens1 = mda.Universe( @@ -149,6 +151,8 @@ def function(x): assert_equal(r[1], arguments[i][0]**2, err_msg="Unexpeted results from ParallelCalculation") + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_rmsd_matrix_with_superimposition(self): conf_dist_matrix = encore.confdistmatrix.conformational_distance_matrix(self.ens1, encore.confdistmatrix.set_rmsd_matrix_elements, @@ -164,6 +168,8 @@ def test_rmsd_matrix_with_superimposition(self): assert_almost_equal(conf_dist_matrix[0,i], rmsd[2], decimal=3, err_msg = "calculated RMSD values differ from the reference implementation") + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_rmsd_matrix_without_superimposition(self): selection_string = "name CA" selection = self.ens1.select_atoms(selection_string) @@ -249,6 +255,8 @@ def test_hes_align(self): assert_almost_equal(result_value, expected_value, decimal=-3, err_msg="Unexpected value for Harmonic Ensemble Similarity: {0:f}. Expected {1:f}.".format(result_value, expected_value)) + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_ces_to_self(self): results, details = \ encore.ces([self.ens1, self.ens1], @@ -258,6 +266,8 @@ def test_ces_to_self(self): assert_almost_equal(result_value, expected_value, err_msg="ClusteringEnsemble Similarity to itself not zero: {0:f}".format(result_value)) + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_ces(self): results, details = encore.ces([self.ens1, self.ens2]) result_value = results[0,1] @@ -267,6 +277,8 @@ def test_ces(self): @dec.skipif(module_not_found('scipy'), "Test skipped because scipy is not available.") + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_dres_to_self(self): results, details = encore.dres([self.ens1, self.ens1]) result_value = results[0,1] @@ -276,6 +288,8 @@ def test_dres_to_self(self): @dec.skipif(module_not_found('scipy'), "Test skipped because scipy is not available.") + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_dres(self): results, details = encore.dres([self.ens1, self.ens2], selection="name CA and resnum 1-10") result_value = results[0,1] @@ -285,6 +299,8 @@ def test_dres(self): @dec.skipif(module_not_found('scipy'), "Test skipped because scipy is not available.") + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_dres_without_superimposition(self): distance_matrix = encore.get_distance_matrix( encore.merge_universes([self.ens1, self.ens2]), @@ -296,6 +312,8 @@ def test_dres_without_superimposition(self): assert_almost_equal(result_value, expected_value, decimal=1, err_msg="Unexpected value for Dim. reduction Ensemble Similarity: {0:f}. Expected {1:f}.".format(result_value, expected_value)) + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_ces_convergence(self): expected_values = [0.3443593, 0.1941854, 0.06857104, 0.] results = encore.ces_convergence(self.ens1, 5) @@ -303,8 +321,11 @@ def test_ces_convergence(self): for i,ev in enumerate(expected_values): assert_almost_equal(ev, results[i], decimal=2, err_msg="Unexpected value for Clustering Ensemble similarity in convergence estimation") + @dec.skipif(module_not_found('scipy'), "Test skipped because scipy is not available.") + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_dres_convergence(self): expected_values = [ 0.3, 0.] results = encore.dres_convergence(self.ens1, 10) @@ -325,6 +346,8 @@ def test_hes_error_estimation(self): err_msg="Unexpected standard daviation for bootstrapped samples in Harmonic Ensemble imilarity") @dec.slow + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_ces_error_estimation(self): expected_average = 0.03 expected_stdev = 0.31 @@ -366,6 +389,8 @@ def test_ces_error_estimation_ensemble_bootstrap(self): @dec.slow @dec.skipif(module_not_found('scipy'), "Test skipped because scipy is not available.") + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_dres_error_estimation(self): average_upper_bound = 0.3 stdev_upper_bound = 0.2 @@ -430,6 +455,8 @@ def tearDownClass(cls): del cls.ens2_template @dec.slow + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_clustering_one_ensemble(self): cluster_collection = encore.cluster(self.ens1) expected_value = 7 @@ -437,6 +464,8 @@ def test_clustering_one_ensemble(self): err_msg="Unexpected results: {0}".format(cluster_collection)) @dec.slow + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_clustering_two_ensembles(self): cluster_collection = encore.cluster([self.ens1, self.ens2]) expected_value = 14 @@ -444,6 +473,8 @@ def test_clustering_two_ensembles(self): err_msg="Unexpected results: {0}".format(cluster_collection)) @dec.slow + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_clustering_three_ensembles_two_identical(self): cluster_collection = encore.cluster([self.ens1, self.ens2, self.ens1]) expected_value = 40 @@ -451,6 +482,8 @@ def test_clustering_three_ensembles_two_identical(self): err_msg="Unexpected result: {0}".format(cluster_collection)) @dec.slow + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_clustering_two_methods(self): cluster_collection = encore.cluster( [self.ens1], @@ -460,6 +493,8 @@ def test_clustering_two_methods(self): err_msg="Unexpected result: {0}".format(cluster_collection)) @dec.slow + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_clustering_AffinityPropagationNative_direct(self): method = encore.AffinityPropagationNative() distance_matrix = encore.get_distance_matrix(self.ens1) @@ -703,6 +738,8 @@ def tearDownClass(cls): del cls.ens2_template @dec.slow + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_dimensionality_reduction_one_ensemble(self): dimension = 2 coordinates, details = encore.reduce_dimensionality(self.ens1) @@ -711,6 +748,8 @@ def test_dimensionality_reduction_one_ensemble(self): err_msg="Unexpected result in dimensionality reduction: {0}".format(coordinates)) @dec.slow + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_dimensionality_reduction_two_ensembles(self): dimension = 2 coordinates, details = \ @@ -719,6 +758,8 @@ def test_dimensionality_reduction_two_ensembles(self): err_msg="Unexpected result in dimensionality reduction: {0}".format(coordinates)) @dec.slow + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_dimensionality_reduction_three_ensembles_two_identical(self): coordinates, details = \ encore.reduce_dimensionality([self.ens1, self.ens2, self.ens1]) @@ -728,6 +769,8 @@ def test_dimensionality_reduction_three_ensembles_two_identical(self): err_msg="Unexpected result in dimensionality reduction: {0}".format(coordinates)) @dec.slow + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_dimensionality_reduction_specified_dimension(self): dimension = 3 coordinates, details = encore.reduce_dimensionality( @@ -737,6 +780,8 @@ def test_dimensionality_reduction_specified_dimension(self): err_msg="Unexpected result in dimensionality reduction: {0}".format(coordinates)) @dec.slow + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_dimensionality_reduction_SPENative_direct(self): dimension = 2 method = encore.StochasticProximityEmbeddingNative(dimension=dimension) @@ -773,6 +818,8 @@ def test_dimensionality_reduction_different_method(self): err_msg="Unexpected result in dimensionality reduction: {0}".format(coordinates)) @dec.slow + @dec.skipif(module_not_found('sklearn'), + "Test skipped because sklearn is not available.") def test_dimensionality_reduction_two_methods(self): dims = [2,3] coordinates, details = \ From e7ddb3fd049c14cbca68ea16b7eb794ca90c362f Mon Sep 17 00:00:00 2001 From: Max Linke Date: Mon, 2 Jan 2017 22:30:03 +0100 Subject: [PATCH 08/15] use include guards for joblib --- .../analysis/encore/clustering/ClusteringMethod.py | 12 ++++++------ package/MDAnalysis/analysis/encore/confdistmatrix.py | 8 ++++++-- .../DimensionalityReductionMethod.py | 11 +++++------ package/MDAnalysis/analysis/encore/utils.py | 7 ++++++- 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py b/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py index c8a91a4810a..b555558ef70 100644 --- a/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py +++ b/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py @@ -44,12 +44,12 @@ try: import sklearn.cluster except ImportError: - sklearn = None - msg = "sklearn.cluster could not be imported: some functionality will " \ - "not be available in encore.fit_clusters()" - warnings.warn(msg, category=ImportWarning) - logging.warn(msg) - del msg + sklearn = None + msg = "sklearn.cluster could not be imported: some functionality will " \ + "not be available in encore.fit_clusters()" + warnings.warn(msg, category=ImportWarning) + logging.warn(msg) + del msg def encode_centroid_info(clusters, cluster_centers_indices): diff --git a/package/MDAnalysis/analysis/encore/confdistmatrix.py b/package/MDAnalysis/analysis/encore/confdistmatrix.py index 9bb7e700b96..d821627799c 100644 --- a/package/MDAnalysis/analysis/encore/confdistmatrix.py +++ b/package/MDAnalysis/analysis/encore/confdistmatrix.py @@ -42,8 +42,6 @@ class to compute an RMSD matrix in such a way is also available. from time import sleep import logging -from sklearn.externals.joblib import Parallel, delayed - from ...core.universe import Universe from ..align import rotation_matrix @@ -51,6 +49,12 @@ class to compute an RMSD matrix in such a way is also available. from .cutils import PureRMSD from .utils import TriangularMatrix, trm_indeces +try: + from sklearn.externals.joblib import Parallel, delayed +except ImportError: + sklearn = None + import warnings + warnings.warn( "Couldn't import sklearn. Can't use conformational_distance_matrix", category=ImportWarning) def conformational_distance_matrix(ensemble, diff --git a/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py b/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py index c579a3ea5fe..66ccf8c76dc 100644 --- a/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py +++ b/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py @@ -43,12 +43,11 @@ try: import sklearn.decomposition except ImportError: - sklearn = None - msg = "sklearn.decomposition could not be imported: some functionality will"\ - "not be available in encore.dimensionality_reduction()" - warnings.warn(msg, category=ImportWarning) - logging.warn(msg) - del msg + sklearn = None + import warnings + warnings.warn("sklearn.decomposition could not be imported: some " + "functionality will not be available in " + "encore.dimensionality_reduction()", category=ImportWarning) class DimensionalityReductionMethod (object): diff --git a/package/MDAnalysis/analysis/encore/utils.py b/package/MDAnalysis/analysis/encore/utils.py index c5f1bae045f..2fef4747015 100644 --- a/package/MDAnalysis/analysis/encore/utils.py +++ b/package/MDAnalysis/analysis/encore/utils.py @@ -22,12 +22,17 @@ from six.moves import range from multiprocessing.sharedctypes import SynchronizedArray from multiprocessing import Process, Manager -from sklearn.externals.joblib import cpu_count import numpy as np import sys import MDAnalysis as mda from ...coordinates.memory import MemoryReader +try: + from sklearn.externals.joblib import cpu_count +except ImportError: + sklearn = None + import warnings + warnings.warn("Couldn't import sklearn. Can't use ParallelCalculation", category=ImportWarning) class TriangularMatrix(object): """Triangular matrix class. This class is designed to provide a From 0d068da33ae3058488d82eb28f1e8e8e3835f840 Mon Sep 17 00:00:00 2001 From: Max Linke Date: Wed, 4 Jan 2017 13:03:20 +0100 Subject: [PATCH 09/15] remove ProgressBar it isn't used anymore and there were license issues with it --- package/MDAnalysis/analysis/encore/utils.py | 54 --------------------- 1 file changed, 54 deletions(-) diff --git a/package/MDAnalysis/analysis/encore/utils.py b/package/MDAnalysis/analysis/encore/utils.py index 2fef4747015..53453527959 100644 --- a/package/MDAnalysis/analysis/encore/utils.py +++ b/package/MDAnalysis/analysis/encore/utils.py @@ -324,60 +324,6 @@ def run(self): return tuple(sorted(results_list, key=lambda x: x[0])) -class ProgressBar(object): - """Handle and draw a progress barr. - From https://github.com/ikame/progressbar - """ - - def __init__(self, start=0, end=10, width=12, fill='=', blank='.', - format='[%(fill)s>%(blank)s] %(progress)s%%', - incremental=True): - super(ProgressBar, self).__init__() - - self.start = start - self.end = end - self.width = width - self.fill = fill - self.blank = blank - self.format = format - self.incremental = incremental - self.step = 100 / float(width) # fix - self.reset() - - def __add__(self, increment): - increment = self._get_progress(increment) - if 100 > self.progress + increment: - self.progress += increment - else: - self.progress = 100 - return self - - def __str__(self): - progressed = int(self.progress / self.step) # fix - fill = progressed * self.fill - blank = (self.width - progressed) * self.blank - return self.format % {'fill': fill, 'blank': blank, - 'progress': int(self.progress)} - - __repr__ = __str__ - - def _get_progress(self, increment): - return float(increment * 100) / self.end - - def reset(self): - """Resets the current progress to the start point""" - self.progress = self._get_progress(self.start) - return self - - def update(self, progress): - """Update the progress value instead of incrementing it""" - this_progress = self._get_progress(progress) - if this_progress < 100: - self.progress = this_progress - else: - self.progress = 100 - - def trm_indeces(a, b): """ Generate (i,j) indeces of a triangular matrix, between elements a and b. From 89503205e25245c579b5cb031cacbc7068f3c284 Mon Sep 17 00:00:00 2001 From: Max Linke Date: Sun, 8 Jan 2017 16:33:32 +0100 Subject: [PATCH 10/15] fix trm_indices spelling --- package/MDAnalysis/analysis/encore/confdistmatrix.py | 6 +++--- package/MDAnalysis/analysis/encore/utils.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/analysis/encore/confdistmatrix.py b/package/MDAnalysis/analysis/encore/confdistmatrix.py index d821627799c..e88467b7a8d 100644 --- a/package/MDAnalysis/analysis/encore/confdistmatrix.py +++ b/package/MDAnalysis/analysis/encore/confdistmatrix.py @@ -47,7 +47,7 @@ class to compute an RMSD matrix in such a way is also available. from ..align import rotation_matrix from .cutils import PureRMSD -from .utils import TriangularMatrix, trm_indeces +from .utils import TriangularMatrix, trm_indices try: from sklearn.externals.joblib import Parallel, delayed @@ -156,7 +156,7 @@ def conformational_distance_matrix(ensemble, # Initialize workers. Simple worker doesn't perform fitting, # fitter worker does. - indices = trm_indeces((0, 0), (framesn - 1, framesn - 1)) + indices = trm_indices((0, 0), (framesn - 1, framesn - 1)) Parallel(n_jobs=n_jobs, verbose=verbose)(delayed(conf_dist_function)( element, rmsd_coordinates, @@ -183,7 +183,7 @@ def set_rmsd_matrix_elements(tasks, coords, rmsdmat, masses, fit_coords=None, tasks : iterator of int of length 2 Given a triangular matrix, this function will calculate RMSD values from element tasks[0] to tasks[1]. Since the matrix - is triangular, the trm_indeces matrix automatically + is triangular, the trm_indices matrix automatically calculates the corrisponding i,j matrix indices. The matrix is written as an array in a row-major order (see the TriangularMatrix class for details). diff --git a/package/MDAnalysis/analysis/encore/utils.py b/package/MDAnalysis/analysis/encore/utils.py index 53453527959..1779fdc3083 100644 --- a/package/MDAnalysis/analysis/encore/utils.py +++ b/package/MDAnalysis/analysis/encore/utils.py @@ -324,11 +324,11 @@ def run(self): return tuple(sorted(results_list, key=lambda x: x[0])) -def trm_indeces(a, b): +def trm_indices(a, b): """ Generate (i,j) indeces of a triangular matrix, between elements a and b. The matrix size is automatically determined from the number of elements. - For instance: trm_indeces((0,0),(2,1)) yields (0,0) (1,0) (1,1) (2,0) + For instance: trm_indices((0,0),(2,1)) yields (0,0) (1,0) (1,1) (2,0) (2,1). Parameters From a541e15ff766a54dfed1ca4a8ef6ab6dbd96e25b Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Thu, 5 Jan 2017 14:21:35 +0000 Subject: [PATCH 11/15] TST: Added tests for analysis.encore import warnings --- .../MDAnalysisTests/analysis/test_encore.py | 40 ++++++++++++++++++- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py index e207712845a..7aaab9f766c 100644 --- a/testsuite/MDAnalysisTests/analysis/test_encore.py +++ b/testsuite/MDAnalysisTests/analysis/test_encore.py @@ -24,13 +24,17 @@ import MDAnalysis as mda import MDAnalysis.analysis.encore as encore +import importlib import tempfile import numpy as np +import sys +import warnings -from numpy.testing import (TestCase, dec, assert_equal, assert_almost_equal) +from numpy.testing import (TestCase, dec, assert_equal, assert_almost_equal, + assert_warns) from MDAnalysisTests.datafiles import DCD, DCD2, PSF -from MDAnalysisTests import parser_not_found, module_not_found +from MDAnalysisTests import parser_not_found, module_not_found, block_import import MDAnalysis.analysis.rms as rms import MDAnalysis.analysis.align as align @@ -840,3 +844,35 @@ def test_dimensionality_reduction_two_different_methods(self): method=[encore.StochasticProximityEmbeddingNative(dims[0]), encore.PrincipalComponentAnalysis(dims[1])]) assert_equal(coordinates[1].shape[0], dims[1]) + + +class TestEncoreImportWarnings(object): + def setUp(self): + # clear cache of encore module + sys.modules.pop('scipy', None) + for mod in list(sys.modules): # list as we're changing as we iterate + if '.encore' in mod: + sys.modules.pop(mod, None) + + @block_import('sklearn') + def _check_sklearn_import_warns(self, package): + warnings.simplefilter('always') + assert_warns(ImportWarning, importlib.import_module, package) + + @block_import('scipy') + def _check_scipy_import_warns(self, package): + warnings.simplefilter('always') + assert_warns(ImportWarning, importlib.import_module, package) + + def test_import_warnings(self): + for pkg in ( + 'MDAnalysis.analysis.encore.confdistmatrix', + 'MDAnalysis.analysis.encore.utils', + 'MDAnalysis.analysis.encore.dimensionality_reduction.DimensionalityReductionMethod', + 'MDAnalysis.analysis.encore.clustering.ClusteringMethod', + ): + yield self._check_sklearn_import_warns, pkg + for pkg in ( + 'MDAnalysis.analysis.encore.similarity', + ): + yield self._check_scipy_import_warns, pkg From c2656bf0c6524197d916baea8c401e0e8bac6c96 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Thu, 5 Jan 2017 15:08:57 +0000 Subject: [PATCH 12/15] Update test_encore.py --- testsuite/MDAnalysisTests/analysis/test_encore.py | 1 - 1 file changed, 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py index 7aaab9f766c..053e444effb 100644 --- a/testsuite/MDAnalysisTests/analysis/test_encore.py +++ b/testsuite/MDAnalysisTests/analysis/test_encore.py @@ -849,7 +849,6 @@ def test_dimensionality_reduction_two_different_methods(self): class TestEncoreImportWarnings(object): def setUp(self): # clear cache of encore module - sys.modules.pop('scipy', None) for mod in list(sys.modules): # list as we're changing as we iterate if '.encore' in mod: sys.modules.pop(mod, None) From 8e1e525af35dfec80259365c61e9f2f31ef5c64a Mon Sep 17 00:00:00 2001 From: Max Linke Date: Sun, 8 Jan 2017 21:26:48 +0100 Subject: [PATCH 13/15] fix failing test suite --- testsuite/MDAnalysisTests/analysis/test_encore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py index 053e444effb..40e8b648abc 100644 --- a/testsuite/MDAnalysisTests/analysis/test_encore.py +++ b/testsuite/MDAnalysisTests/analysis/test_encore.py @@ -850,7 +850,7 @@ class TestEncoreImportWarnings(object): def setUp(self): # clear cache of encore module for mod in list(sys.modules): # list as we're changing as we iterate - if '.encore' in mod: + if '.encore' in mod or 'sklearn' in mod or 'scipy' in mod: sys.modules.pop(mod, None) @block_import('sklearn') From 0dcbd8d57c5fc43589a5b99272b22e54d8c7feb6 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Mon, 9 Jan 2017 16:06:57 +0000 Subject: [PATCH 14/15] TST: Fixed block_import not blocking subpackages --- testsuite/MDAnalysisTests/analysis/test_encore.py | 2 +- testsuite/MDAnalysisTests/util.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py index 40e8b648abc..f364cfa9772 100644 --- a/testsuite/MDAnalysisTests/analysis/test_encore.py +++ b/testsuite/MDAnalysisTests/analysis/test_encore.py @@ -850,7 +850,7 @@ class TestEncoreImportWarnings(object): def setUp(self): # clear cache of encore module for mod in list(sys.modules): # list as we're changing as we iterate - if '.encore' in mod or 'sklearn' in mod or 'scipy' in mod: + if 'encore' in mod: sys.modules.pop(mod, None) @block_import('sklearn') diff --git a/testsuite/MDAnalysisTests/util.py b/testsuite/MDAnalysisTests/util.py index 992b427107b..63f4690aece 100644 --- a/testsuite/MDAnalysisTests/util.py +++ b/testsuite/MDAnalysisTests/util.py @@ -42,10 +42,13 @@ def block_import(package): eg: - @blocker('numpy') + @block_import('numpy') def try_and_do_something(): import numpy as np # this will fail! + Will also block imports of subpackages ie block_import('numpy') should + block 'import numpy.matrix' + Shadows the builtin import method, sniffs import requests and blocks the designated package. """ @@ -55,7 +58,7 @@ def func_wrapper(*args, **kwargs): with mock.patch('{}.__import__'.format(builtins_name), wraps=importer) as mbi: def blocker(*args, **kwargs): - if package in args: + if package in args[0]: raise ImportError("Blocked by block_import") else: # returning DEFAULT allows the real function to continue From c0699584f78c1cbc422f93eb30702e2e3f01f8c0 Mon Sep 17 00:00:00 2001 From: Max Linke Date: Fri, 13 Jan 2017 23:21:08 +0100 Subject: [PATCH 15/15] switch to use standalone joblib package We don't use the sklearn packaged version to have most of the encore distribution run normally inside of MDAnalysis --- .travis.yml | 2 +- .../analysis/encore/confdistmatrix.py | 5 +- package/MDAnalysis/analysis/encore/utils.py | 5 +- package/setup.py | 3 +- .../MDAnalysisTests/analysis/test_encore.py | 73 ++++--------------- 5 files changed, 20 insertions(+), 68 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0e2e663079b..81dc3fec526 100644 --- a/.travis.yml +++ b/.travis.yml @@ -43,7 +43,7 @@ before_install: - conda install --yes pylint install: # Minimal installation! - - conda create --yes -q -n pyenv python=$PYTHON_VERSION numpy mmtf-python nose=1.3.7 mock sphinx=1.3 six biopython networkx cython + - conda create --yes -q -n pyenv python=$PYTHON_VERSION numpy mmtf-python nose=1.3.7 mock sphinx=1.3 six biopython networkx cython joblib - source activate pyenv # Install griddataformats from PIP so that scipy is only installed in the full build (#1147) - pip install griddataformats diff --git a/package/MDAnalysis/analysis/encore/confdistmatrix.py b/package/MDAnalysis/analysis/encore/confdistmatrix.py index e88467b7a8d..cb53a51829e 100644 --- a/package/MDAnalysis/analysis/encore/confdistmatrix.py +++ b/package/MDAnalysis/analysis/encore/confdistmatrix.py @@ -50,11 +50,10 @@ class to compute an RMSD matrix in such a way is also available. from .utils import TriangularMatrix, trm_indices try: - from sklearn.externals.joblib import Parallel, delayed + from joblib import Parallel, delayed except ImportError: - sklearn = None import warnings - warnings.warn( "Couldn't import sklearn. Can't use conformational_distance_matrix", category=ImportWarning) + warnings.warn( "Couldn't import joblib. Can't use conformational_distance_matrix", category=ImportWarning) def conformational_distance_matrix(ensemble, diff --git a/package/MDAnalysis/analysis/encore/utils.py b/package/MDAnalysis/analysis/encore/utils.py index 1779fdc3083..7b9e72e4bce 100644 --- a/package/MDAnalysis/analysis/encore/utils.py +++ b/package/MDAnalysis/analysis/encore/utils.py @@ -28,11 +28,10 @@ from ...coordinates.memory import MemoryReader try: - from sklearn.externals.joblib import cpu_count + from joblib import cpu_count except ImportError: - sklearn = None import warnings - warnings.warn("Couldn't import sklearn. Can't use ParallelCalculation", category=ImportWarning) + warnings.warn("Couldn't import joblib. Can't use ParallelCalculation", category=ImportWarning) class TriangularMatrix(object): """Triangular matrix class. This class is designed to provide a diff --git a/package/setup.py b/package/setup.py index 7eb8a43bded..4bae9214790 100755 --- a/package/setup.py +++ b/package/setup.py @@ -498,7 +498,7 @@ def dynamic_author_list(): classifiers=CLASSIFIERS, cmdclass=cmdclass, requires=['numpy (>=1.5.0)', 'biopython', 'mmtf (>=1.0.0)', - 'networkx (>=1.0)', 'GridDataFormats (>=0.3.2)'], + 'networkx (>=1.0)', 'GridDataFormats (>=0.3.2)', 'joblib'], # all standard requirements are available through PyPi and # typically can be installed without difficulties through setuptools setup_requires=[ @@ -511,6 +511,7 @@ def dynamic_author_list(): 'GridDataFormats>=0.3.2', 'six>=1.4.0', 'mmtf-python>=1.0.0', + 'joblib', ], # extras can be difficult to install through setuptools and/or # you might prefer to use the version available through your diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py index f364cfa9772..b1ca93423bd 100644 --- a/testsuite/MDAnalysisTests/analysis/test_encore.py +++ b/testsuite/MDAnalysisTests/analysis/test_encore.py @@ -44,8 +44,6 @@ class TestEncore(TestCase): @dec.skipif(parser_not_found('DCD'), 'DCD parser not available. Are you using python 3?') - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def setUp(self): # Create universe from templates defined in setUpClass self.ens1 = mda.Universe( @@ -155,15 +153,14 @@ def function(x): assert_equal(r[1], arguments[i][0]**2, err_msg="Unexpeted results from ParallelCalculation") - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_rmsd_matrix_with_superimposition(self): - conf_dist_matrix = encore.confdistmatrix.conformational_distance_matrix(self.ens1, - encore.confdistmatrix.set_rmsd_matrix_elements, - selection = "name CA", - pairwise_align = True, - mass_weighted = True, - n_jobs = 1) + conf_dist_matrix = encore.confdistmatrix.conformational_distance_matrix( + self.ens1, + encore.confdistmatrix.set_rmsd_matrix_elements, + selection="name CA", + pairwise_align=True, + mass_weighted=True, + n_jobs=1) reference = rms.RMSD(self.ens1, select = "name CA") reference.run() @@ -172,8 +169,6 @@ def test_rmsd_matrix_with_superimposition(self): assert_almost_equal(conf_dist_matrix[0,i], rmsd[2], decimal=3, err_msg = "calculated RMSD values differ from the reference implementation") - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_rmsd_matrix_without_superimposition(self): selection_string = "name CA" selection = self.ens1.select_atoms(selection_string) @@ -183,12 +178,12 @@ def test_rmsd_matrix_without_superimposition(self): reference_rmsd.append(rms.rmsd(coordinates[0], coord, superposition=False)) confdist_matrix = encore.confdistmatrix.conformational_distance_matrix( - self.ens1, - encore.confdistmatrix.set_rmsd_matrix_elements, - selection = selection_string, - pairwise_align = False, - mass_weighted = True, - n_jobs = 1) + self.ens1, + encore.confdistmatrix.set_rmsd_matrix_elements, + selection=selection_string, + pairwise_align=False, + mass_weighted=True, + n_jobs=1) print (repr(confdist_matrix.as_array()[0,:])) assert_almost_equal(confdist_matrix.as_array()[0,:], reference_rmsd, decimal=3, @@ -259,8 +254,6 @@ def test_hes_align(self): assert_almost_equal(result_value, expected_value, decimal=-3, err_msg="Unexpected value for Harmonic Ensemble Similarity: {0:f}. Expected {1:f}.".format(result_value, expected_value)) - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_ces_to_self(self): results, details = \ encore.ces([self.ens1, self.ens1], @@ -270,8 +263,6 @@ def test_ces_to_self(self): assert_almost_equal(result_value, expected_value, err_msg="ClusteringEnsemble Similarity to itself not zero: {0:f}".format(result_value)) - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_ces(self): results, details = encore.ces([self.ens1, self.ens2]) result_value = results[0,1] @@ -281,8 +272,6 @@ def test_ces(self): @dec.skipif(module_not_found('scipy'), "Test skipped because scipy is not available.") - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_dres_to_self(self): results, details = encore.dres([self.ens1, self.ens1]) result_value = results[0,1] @@ -292,8 +281,6 @@ def test_dres_to_self(self): @dec.skipif(module_not_found('scipy'), "Test skipped because scipy is not available.") - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_dres(self): results, details = encore.dres([self.ens1, self.ens2], selection="name CA and resnum 1-10") result_value = results[0,1] @@ -303,8 +290,6 @@ def test_dres(self): @dec.skipif(module_not_found('scipy'), "Test skipped because scipy is not available.") - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_dres_without_superimposition(self): distance_matrix = encore.get_distance_matrix( encore.merge_universes([self.ens1, self.ens2]), @@ -316,8 +301,6 @@ def test_dres_without_superimposition(self): assert_almost_equal(result_value, expected_value, decimal=1, err_msg="Unexpected value for Dim. reduction Ensemble Similarity: {0:f}. Expected {1:f}.".format(result_value, expected_value)) - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_ces_convergence(self): expected_values = [0.3443593, 0.1941854, 0.06857104, 0.] results = encore.ces_convergence(self.ens1, 5) @@ -328,8 +311,6 @@ def test_ces_convergence(self): @dec.skipif(module_not_found('scipy'), "Test skipped because scipy is not available.") - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_dres_convergence(self): expected_values = [ 0.3, 0.] results = encore.dres_convergence(self.ens1, 10) @@ -350,8 +331,6 @@ def test_hes_error_estimation(self): err_msg="Unexpected standard daviation for bootstrapped samples in Harmonic Ensemble imilarity") @dec.slow - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_ces_error_estimation(self): expected_average = 0.03 expected_stdev = 0.31 @@ -393,8 +372,6 @@ def test_ces_error_estimation_ensemble_bootstrap(self): @dec.slow @dec.skipif(module_not_found('scipy'), "Test skipped because scipy is not available.") - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_dres_error_estimation(self): average_upper_bound = 0.3 stdev_upper_bound = 0.2 @@ -459,8 +436,6 @@ def tearDownClass(cls): del cls.ens2_template @dec.slow - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_clustering_one_ensemble(self): cluster_collection = encore.cluster(self.ens1) expected_value = 7 @@ -468,8 +443,6 @@ def test_clustering_one_ensemble(self): err_msg="Unexpected results: {0}".format(cluster_collection)) @dec.slow - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_clustering_two_ensembles(self): cluster_collection = encore.cluster([self.ens1, self.ens2]) expected_value = 14 @@ -477,8 +450,6 @@ def test_clustering_two_ensembles(self): err_msg="Unexpected results: {0}".format(cluster_collection)) @dec.slow - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_clustering_three_ensembles_two_identical(self): cluster_collection = encore.cluster([self.ens1, self.ens2, self.ens1]) expected_value = 40 @@ -486,8 +457,6 @@ def test_clustering_three_ensembles_two_identical(self): err_msg="Unexpected result: {0}".format(cluster_collection)) @dec.slow - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_clustering_two_methods(self): cluster_collection = encore.cluster( [self.ens1], @@ -497,8 +466,6 @@ def test_clustering_two_methods(self): err_msg="Unexpected result: {0}".format(cluster_collection)) @dec.slow - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_clustering_AffinityPropagationNative_direct(self): method = encore.AffinityPropagationNative() distance_matrix = encore.get_distance_matrix(self.ens1) @@ -742,8 +709,6 @@ def tearDownClass(cls): del cls.ens2_template @dec.slow - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_dimensionality_reduction_one_ensemble(self): dimension = 2 coordinates, details = encore.reduce_dimensionality(self.ens1) @@ -752,8 +717,6 @@ def test_dimensionality_reduction_one_ensemble(self): err_msg="Unexpected result in dimensionality reduction: {0}".format(coordinates)) @dec.slow - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_dimensionality_reduction_two_ensembles(self): dimension = 2 coordinates, details = \ @@ -762,8 +725,6 @@ def test_dimensionality_reduction_two_ensembles(self): err_msg="Unexpected result in dimensionality reduction: {0}".format(coordinates)) @dec.slow - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_dimensionality_reduction_three_ensembles_two_identical(self): coordinates, details = \ encore.reduce_dimensionality([self.ens1, self.ens2, self.ens1]) @@ -773,8 +734,6 @@ def test_dimensionality_reduction_three_ensembles_two_identical(self): err_msg="Unexpected result in dimensionality reduction: {0}".format(coordinates)) @dec.slow - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_dimensionality_reduction_specified_dimension(self): dimension = 3 coordinates, details = encore.reduce_dimensionality( @@ -784,8 +743,6 @@ def test_dimensionality_reduction_specified_dimension(self): err_msg="Unexpected result in dimensionality reduction: {0}".format(coordinates)) @dec.slow - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_dimensionality_reduction_SPENative_direct(self): dimension = 2 method = encore.StochasticProximityEmbeddingNative(dimension=dimension) @@ -822,8 +779,6 @@ def test_dimensionality_reduction_different_method(self): err_msg="Unexpected result in dimensionality reduction: {0}".format(coordinates)) @dec.slow - @dec.skipif(module_not_found('sklearn'), - "Test skipped because sklearn is not available.") def test_dimensionality_reduction_two_methods(self): dims = [2,3] coordinates, details = \ @@ -865,8 +820,6 @@ def _check_scipy_import_warns(self, package): def test_import_warnings(self): for pkg in ( - 'MDAnalysis.analysis.encore.confdistmatrix', - 'MDAnalysis.analysis.encore.utils', 'MDAnalysis.analysis.encore.dimensionality_reduction.DimensionalityReductionMethod', 'MDAnalysis.analysis.encore.clustering.ClusteringMethod', ):