From 8dfe0103d02ab48856c61b76e4ff8e9480c1462b Mon Sep 17 00:00:00 2001 From: Manuel Nuno Melo Date: Mon, 19 Jun 2017 01:44:14 +0200 Subject: [PATCH] Initial adaptation of peak.util.imports --- package/MDAnalysis/analysis/density.py | 26 +- package/MDAnalysis/analysis/distances.py | 4 +- .../encore/clustering/ClusteringMethod.py | 529 +++++++++--------- .../DimensionalityReductionMethod.py | 84 ++- .../MDAnalysis/analysis/encore/similarity.py | 7 +- .../analysis/hbonds/hbond_autocorrel.py | 4 + package/MDAnalysis/analysis/hole.py | 7 +- package/MDAnalysis/analysis/legacy/x3dna.py | 5 +- package/MDAnalysis/analysis/pca.py | 6 +- package/MDAnalysis/analysis/polymer.py | 12 +- package/MDAnalysis/analysis/psa.py | 138 +++-- package/MDAnalysis/lib/lazy.py | 271 +++++++++ .../MDAnalysis/visualization/streamlines.py | 16 +- .../visualization/streamlines_3D.py | 7 +- .../analysis/test_distances.py | 1 - .../MDAnalysisTests/analysis/test_encore.py | 24 +- 16 files changed, 678 insertions(+), 463 deletions(-) create mode 100644 package/MDAnalysis/lib/lazy.py diff --git a/package/MDAnalysis/analysis/density.py b/package/MDAnalysis/analysis/density.py index 26d45af3fd5..b77975f9820 100644 --- a/package/MDAnalysis/analysis/density.py +++ b/package/MDAnalysis/analysis/density.py @@ -118,31 +118,7 @@ import os.path import errno import warnings - -try: - from gridData import Grid -except ImportError: - raise ImportError( - """ImportError: The GridDataFormats package can not be found! - - The 'gridData' module from GridDataFormats could not be - imported. Please install it first. You can try installing - directly from the internet: - - pip install GridDataFormats - - or - - conda config --add channels conda-forge - conda install griddataformats - - Alternatively, download the package from - - http://pypi.python.org/pypi/GridDataFormats/ - - and install in the usual manner. - """ - ) +from gridData import Grid import MDAnalysis from MDAnalysis.core import groups diff --git a/package/MDAnalysis/analysis/distances.py b/package/MDAnalysis/analysis/distances.py index fc3dc4432a6..a240e7406ad 100644 --- a/package/MDAnalysis/analysis/distances.py +++ b/package/MDAnalysis/analysis/distances.py @@ -42,7 +42,6 @@ 'contact_matrix', 'dist', 'between'] import numpy as np -import scipy.sparse from MDAnalysis.lib.distances import distance_array, self_distance_array from MDAnalysis.lib.c_distances import contact_matrix_no_pbc, contact_matrix_pbc @@ -52,6 +51,9 @@ import logging logger = logging.getLogger("MDAnalysis.analysis.distances") +# Optional and/or lazily imported modules +from MDAnalysis.lib import lazy +scipy = lazy.import_module('scipy.sparse', level='base') def contact_matrix(coord, cutoff=15.0, returntype="numpy", box=None): '''Calculates a matrix of contacts. diff --git a/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py b/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py index a293b755e3e..b2ed24c9d3b 100644 --- a/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py +++ b/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py @@ -41,16 +41,10 @@ # Import native affinity propagation implementation from . import affinityprop -# Attempt to import scikit-learn clustering algorithms -try: - import sklearn.cluster -except ImportError: - sklearn = None - msg = "sklearn.cluster could not be imported: some functionality will " \ - "not be available in encore.fit_clusters()" - warnings.warn(msg, category=ImportWarning) - logging.warn(msg) - del msg +# Optional and/or lazily loaded modules +from MDAnalysis.lib import lazy +# scikit-learn clustering algorithms +sklearn = lazy.import_module('sklearn.cluster', level='base') def encode_centroid_info(clusters, cluster_centers_indices): @@ -158,270 +152,269 @@ def __call__(self, distance_matrix): details = {} return clusters, details -if sklearn: - class AffinityPropagation(ClusteringMethod): +class AffinityPropagation(ClusteringMethod): + """ + Interface to the Affinity propagation clustering procedure implemented + in sklearn. + """ + + def __init__(self, + damping=0.9, preference=-1.0, + max_iter=500, convergence_iter=50, + **kwargs): + """ + Parameters + ---------- + + damping : float, optional + Damping factor (default is 0.9). Parameter for the Affinity + Propagation for clustering. + + preference : float, optional + Preference parameter used in the Affinity Propagation algorithm + for clustering (default -1.0). A high preference value results + in many clusters, a low preference will result in fewer numbers + of clusters. + + max_iter : int, optional + Maximum number of iterations for affinity propagation (default + is 500). + + convergence_iter : int, optional + Minimum number of unchanging iterations to achieve convergence + (default is 50). Parameter in the Affinity Propagation for + clustering. + + """ + self.ap = \ + sklearn.cluster.AffinityPropagation( + damping=damping, + preference=preference, + max_iter=max_iter, + convergence_iter=convergence_iter, + affinity="precomputed", + **kwargs) + + def __call__(self, distance_matrix): + """ + Parameters + ---------- + + distance_matrix : encore.utils.TriangularMatrix + conformational distance matrix + + Returns + ------- + numpy.array + list of cluster indices + + """ + logging.info("Starting Affinity Propagation: {0}".format + (self.ap.get_params())) + + # Convert from distance matrix to similarity matrix + similarity_matrix = distance_matrix.as_array() * -1 + clusters = self.ap.fit_predict(similarity_matrix) + clusters = encode_centroid_info(clusters, + self.ap.cluster_centers_indices_) + details = {} + return clusters, details + + +class DBSCAN(ClusteringMethod): + """ + Interface to the DBSCAN clustering procedure implemented in sklearn. + """ + def __init__(self, + eps=0.5, + min_samples=5, + algorithm="auto", + leaf_size=30, + **kwargs): """ - Interface to the Affinity propagation clustering procedure implemented - in sklearn. + Parameters + ---------- + + eps : float, optional (default = 0.5) + The maximum distance between two samples for them to be + considered as in the same neighborhood. + + min_samples : int, optional (default = 5) + The number of samples (or total weight) in a neighborhood for + a point to be considered as a core point. This includes the + point itself. + + algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional + The algorithm to be used by the NearestNeighbors module + to compute pointwise distances and find nearest neighbors. + See NearestNeighbors module documentation for details. + + leaf_size : int, optional (default = 30) + Leaf size passed to BallTree or cKDTree. This can affect the + speed of the construction and query, as well as the memory + required to store the tree. The optimal value depends + on the nature of the problem. + + sample_weight : array, shape (n_samples,), optional + Weight of each sample, such that a sample with a weight of at + least ``min_samples`` is by itself a core sample; a sample with + negative weight may inhibit its eps-neighbor from being core. + Note that weights are absolute, and default to 1. + + """ + + self.dbscan = sklearn.cluster.DBSCAN(eps=eps, + min_samples = min_samples, + algorithm=algorithm, + leaf_size = leaf_size, + metric="precomputed", + **kwargs) + + def __call__(self, distance_matrix): """ + Parameters + ---------- + + distance_matrix : encore.utils.TriangularMatrix + conformational distance matrix + + + Returns + ------- + numpy.array + list of cluster indices - def __init__(self, - damping=0.9, preference=-1.0, - max_iter=500, convergence_iter=50, - **kwargs): - """ - Parameters - ---------- - - damping : float, optional - Damping factor (default is 0.9). Parameter for the Affinity - Propagation for clustering. - - preference : float, optional - Preference parameter used in the Affinity Propagation algorithm - for clustering (default -1.0). A high preference value results - in many clusters, a low preference will result in fewer numbers - of clusters. - - max_iter : int, optional - Maximum number of iterations for affinity propagation (default - is 500). - - convergence_iter : int, optional - Minimum number of unchanging iterations to achieve convergence - (default is 50). Parameter in the Affinity Propagation for - clustering. - - """ - self.ap = \ - sklearn.cluster.AffinityPropagation( - damping=damping, - preference=preference, - max_iter=max_iter, - convergence_iter=convergence_iter, - affinity="precomputed", - **kwargs) - - def __call__(self, distance_matrix): - """ - Parameters - ---------- - - distance_matrix : encore.utils.TriangularMatrix - conformational distance matrix - - Returns - ------- - numpy.array - list of cluster indices - - """ - logging.info("Starting Affinity Propagation: {0}".format - (self.ap.get_params())) - - # Convert from distance matrix to similarity matrix - similarity_matrix = distance_matrix.as_array() * -1 - clusters = self.ap.fit_predict(similarity_matrix) - clusters = encode_centroid_info(clusters, - self.ap.cluster_centers_indices_) - details = {} - return clusters, details - - - class DBSCAN(ClusteringMethod): """ - Interface to the DBSCAN clustering procedure implemented in sklearn. + logging.info("Starting DBSCAN: {0}".format( + self.dbscan.get_params())) + clusters = self.dbscan.fit_predict(distance_matrix.as_array()) + if np.min(clusters == -1): + clusters += 1 + # No centroid information is provided by DBSCAN, so we just + # pick random members + cluster_representatives = np.unique(clusters, return_index=True)[1] + clusters = encode_centroid_info(clusters, + cluster_representatives) + details = {} + return clusters, details + +class KMeans(ClusteringMethod): + + # Whether the method accepts a distance matrix + accepts_distance_matrix = False + + """ + Interface to the KMeans clustering procedure implemented in sklearn. + """ + def __init__(self, + n_clusters, + max_iter = 300, + n_init = 10, + init = 'k-means++', + algorithm="auto", + tol = 1e-4, + verbose=False, + random_state=None, + copy_x=True, + n_jobs=1, + **kwargs): """ - def __init__(self, - eps=0.5, - min_samples=5, - algorithm="auto", - leaf_size=30, - **kwargs): - """ - Parameters - ---------- - - eps : float, optional (default = 0.5) - The maximum distance between two samples for them to be - considered as in the same neighborhood. - - min_samples : int, optional (default = 5) - The number of samples (or total weight) in a neighborhood for - a point to be considered as a core point. This includes the - point itself. - - algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional - The algorithm to be used by the NearestNeighbors module - to compute pointwise distances and find nearest neighbors. - See NearestNeighbors module documentation for details. - - leaf_size : int, optional (default = 30) - Leaf size passed to BallTree or cKDTree. This can affect the - speed of the construction and query, as well as the memory - required to store the tree. The optimal value depends - on the nature of the problem. - - sample_weight : array, shape (n_samples,), optional - Weight of each sample, such that a sample with a weight of at - least ``min_samples`` is by itself a core sample; a sample with - negative weight may inhibit its eps-neighbor from being core. - Note that weights are absolute, and default to 1. - - """ - - self.dbscan = sklearn.cluster.DBSCAN(eps=eps, - min_samples = min_samples, - algorithm=algorithm, - leaf_size = leaf_size, - metric="precomputed", - **kwargs) - - def __call__(self, distance_matrix): - """ - Parameters - ---------- - - distance_matrix : encore.utils.TriangularMatrix - conformational distance matrix - - - Returns - ------- - numpy.array - list of cluster indices - - """ - logging.info("Starting DBSCAN: {0}".format( - self.dbscan.get_params())) - clusters = self.dbscan.fit_predict(distance_matrix.as_array()) - if np.min(clusters == -1): - clusters += 1 - # No centroid information is provided by DBSCAN, so we just - # pick random members - cluster_representatives = np.unique(clusters, return_index=True)[1] - clusters = encode_centroid_info(clusters, - cluster_representatives) - details = {} - return clusters, details - - class KMeans(ClusteringMethod): - - # Whether the method accepts a distance matrix - accepts_distance_matrix = False + Parameters + ---------- + n_clusters : int + The number of clusters to form as well as the number of + centroids to generate. + + max_iter : int, optional (default 300) + Maximum number of iterations of the k-means algorithm to run. + + n_init : int, optional (default 10) + Number of time the k-means algorithm will be run with different + centroid seeds. The final results will be the best output of + n_init consecutive runs in terms of inertia. + + init : {'k-means++', 'random', or ndarray, or a callable}, optional + Method for initialization, default to 'k-means++': + 'k-means++' : selects initial cluster centers for k-mean + clustering in a smart way to speed up convergence. See section + Notes in k_init for more details. + 'random': generate k centroids from a Gaussian with mean and + variance estimated from the data. + If an ndarray is passed, it should be of shape + (n_clusters, n_features) and gives the initial centers. + If a callable is passed, it should take arguments X, k and + and a ranndom state and return an initialization. + + precompute_distances : {'auto', True, False} + Precompute distances (faster but takes more memory). + 'auto' : do not precompute distances if + n_samples * n_clusters > 12 million. This corresponds to about + 100MB overhead per job using double precision. + True : always precompute distances + False : never precompute distances + + tol : float, optional (default 1e-4) + The relative increment in the results before declaring + convergence. + + verbose : boolean, optional (default False) + Verbosity mode. + + random_state : integer or numpy.RandomState, optional + The generator used to initialize the centers. If an integer is + given, it fixes the seed. Defaults to the global numpy random + number generator. + + copy_x : boolean, optional + When pre-computing distances it is more numerically accurate to + center the data first. If copy_x is True, then the original + data is not modified. If False, the original data is modified, + and put back before the function returns, but small numerical + differences may be introduced by subtracting and then adding + the data mean. + + n_jobs : int + The number of jobs to use for the computation. This works by + computing each of the n_init runs in parallel. If -1 all CPUs + are used. If 1 is given, no parallel computing code is used at + all, which is useful for debugging. For n_jobs below -1, + (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs + but one are used. """ - Interface to the KMeans clustering procedure implemented in sklearn. + self.kmeans = sklearn.cluster.KMeans(n_clusters = n_clusters, + max_iter = max_iter, + n_init = n_init, + init = init, + precompute_distances='auto', + tol = tol, + verbose=verbose, + random_state=random_state, + copy_x=copy_x, + n_jobs=n_jobs, + **kwargs) + + def __call__(self, coordinates): """ - def __init__(self, - n_clusters, - max_iter = 300, - n_init = 10, - init = 'k-means++', - algorithm="auto", - tol = 1e-4, - verbose=False, - random_state=None, - copy_x=True, - n_jobs=1, - **kwargs): - """ - Parameters - ---------- - n_clusters : int - The number of clusters to form as well as the number of - centroids to generate. - - max_iter : int, optional (default 300) - Maximum number of iterations of the k-means algorithm to run. - - n_init : int, optional (default 10) - Number of time the k-means algorithm will be run with different - centroid seeds. The final results will be the best output of - n_init consecutive runs in terms of inertia. - - init : {'k-means++', 'random', or ndarray, or a callable}, optional - Method for initialization, default to 'k-means++': - 'k-means++' : selects initial cluster centers for k-mean - clustering in a smart way to speed up convergence. See section - Notes in k_init for more details. - 'random': generate k centroids from a Gaussian with mean and - variance estimated from the data. - If an ndarray is passed, it should be of shape - (n_clusters, n_features) and gives the initial centers. - If a callable is passed, it should take arguments X, k and - and a ranndom state and return an initialization. - - precompute_distances : {'auto', True, False} - Precompute distances (faster but takes more memory). - 'auto' : do not precompute distances if - n_samples * n_clusters > 12 million. This corresponds to about - 100MB overhead per job using double precision. - True : always precompute distances - False : never precompute distances - - tol : float, optional (default 1e-4) - The relative increment in the results before declaring - convergence. - - verbose : boolean, optional (default False) - Verbosity mode. - - random_state : integer or numpy.RandomState, optional - The generator used to initialize the centers. If an integer is - given, it fixes the seed. Defaults to the global numpy random - number generator. - - copy_x : boolean, optional - When pre-computing distances it is more numerically accurate to - center the data first. If copy_x is True, then the original - data is not modified. If False, the original data is modified, - and put back before the function returns, but small numerical - differences may be introduced by subtracting and then adding - the data mean. - - n_jobs : int - The number of jobs to use for the computation. This works by - computing each of the n_init runs in parallel. If -1 all CPUs - are used. If 1 is given, no parallel computing code is used at - all, which is useful for debugging. For n_jobs below -1, - (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs - but one are used. - - """ - self.kmeans = sklearn.cluster.KMeans(n_clusters = n_clusters, - max_iter = max_iter, - n_init = n_init, - init = init, - precompute_distances='auto', - tol = tol, - verbose=verbose, - random_state=random_state, - copy_x=copy_x, - n_jobs=n_jobs, - **kwargs) - - def __call__(self, coordinates): - """ - Parameters - ---------- - - coordinates : np.array - trajectory atom coordinates - - - Returns - ------- - numpy.array - list of cluster indices - """ - logging.info("Starting Kmeans: {0}".format( - (self.kmeans.get_params()))) - clusters = self.kmeans.fit_predict(coordinates) - distances = self.kmeans.transform(coordinates) - cluster_center_indices = np.argmin(distances, axis=0) - clusters = encode_centroid_info(clusters, - cluster_center_indices) - details = {} - return clusters, details + Parameters + ---------- + + coordinates : np.array + trajectory atom coordinates + + + Returns + ------- + numpy.array + list of cluster indices + """ + logging.info("Starting Kmeans: {0}".format( + (self.kmeans.get_params()))) + clusters = self.kmeans.fit_predict(coordinates) + distances = self.kmeans.transform(coordinates) + cluster_center_indices = np.argmin(distances, axis=0) + clusters = encode_centroid_info(clusters, + cluster_center_indices) + details = {} + return clusters, details diff --git a/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py b/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py index dfb6226d2e8..f4f015e7f97 100644 --- a/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py +++ b/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py @@ -40,15 +40,10 @@ # Import native affinity propagation implementation from . import stochasticproxembed -# Attempt to import scikit-learn clustering algorithms -try: - import sklearn.decomposition -except ImportError: - sklearn = None - import warnings - warnings.warn("sklearn.decomposition could not be imported: some " - "functionality will not be available in " - "encore.dimensionality_reduction()", category=ImportWarning) +# Optional and/or lazily loaded modules +from MDAnalysis.lib import lazy +# scikit-learn clustering algorithms +sklearn = lazy.import_module('sklearn.decomposition', level='base') class DimensionalityReductionMethod (object): @@ -150,45 +145,42 @@ def __call__(self, distance_matrix): return coordinates, {"final_stress": final_stress} +class PrincipalComponentAnalysis(DimensionalityReductionMethod): + """ + Interface to the PCA dimensionality reduction method implemented in + sklearn. + """ -if sklearn: + # Whether the method accepts a distance matrix + accepts_distance_matrix = False - class PrincipalComponentAnalysis(DimensionalityReductionMethod): + def __init__(self, + dimension = 2, + **kwargs): + """ + Parameters + ---------- + + dimension : int + Number of dimensions to which the conformational space will be + reduced to (default is 3). """ - Interface to the PCA dimensionality reduction method implemented in - sklearn. + self.pca = sklearn.decomposition.PCA(n_components=dimension, + **kwargs) + + def __call__(self, coordinates): """ + Parameters + ---------- + + coordinates : np.array + trajectory atom coordinates + - # Whether the method accepts a distance matrix - accepts_distance_matrix = False - - def __init__(self, - dimension = 2, - **kwargs): - """ - Parameters - ---------- - - dimension : int - Number of dimensions to which the conformational space will be - reduced to (default is 3). - """ - self.pca = sklearn.decomposition.PCA(n_components=dimension, - **kwargs) - - def __call__(self, coordinates): - """ - Parameters - ---------- - - coordinates : np.array - trajectory atom coordinates - - - Returns - ------- - numpy.array - coordinates in reduced space - """ - coordinates = self.pca.fit_transform(coordinates) - return coordinates.T, {} + Returns + ------- + numpy.array + coordinates in reduced space + """ + coordinates = self.pca.fit_transform(coordinates) + return coordinates.T, {} diff --git a/package/MDAnalysis/analysis/encore/similarity.py b/package/MDAnalysis/analysis/encore/similarity.py index 53ec497f5ce..dce5f3e63f5 100644 --- a/package/MDAnalysis/analysis/encore/similarity.py +++ b/package/MDAnalysis/analysis/encore/similarity.py @@ -19,7 +19,7 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -"""================================================================================= +r""" Ensemble Similarity Calculations --- :mod:`MDAnalysis.analysis.encore.similarity` ================================================================================= @@ -176,7 +176,6 @@ import logging import numpy as np -import scipy.stats import MDAnalysis as mda @@ -195,6 +194,10 @@ from .utils import merge_universes from .utils import trm_indices_diag, trm_indices_nodiag +# Optional and/or lazily imported modules +from MDAnalysis.lib import lazy +scipy = lazy.import_module('scipy.stats', level='base') + # Low boundary value for log() argument - ensure no nans EPSILON = 1E-15 diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py index 6a5bd82f9ab..e21a457fdac 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py @@ -162,6 +162,10 @@ from MDAnalysis.lib.log import ProgressMeter from MDAnalysis.lib.distances import distance_array, calc_angles, calc_bonds +# Optional and/or lazily loaded modules +from MDAnalysis.lib import lazy +leastsq = lazy.import_function('scipy.optimize.leastsq') + class HydrogenBondAutoCorrel(object): """Perform a time autocorrelation of the hydrogen bonds in the system. diff --git a/package/MDAnalysis/analysis/hole.py b/package/MDAnalysis/analysis/hole.py index c2fd3cc4ead..c6210e5a858 100644 --- a/package/MDAnalysis/analysis/hole.py +++ b/package/MDAnalysis/analysis/hole.py @@ -258,14 +258,17 @@ from itertools import cycle import numpy as np -import matplotlib -import matplotlib.pyplot as plt from MDAnalysis import Universe from MDAnalysis.exceptions import ApplicationError from MDAnalysis.lib.util import which, realpath, asiterable from MDAnalysis.lib.util import FORTRANReader +# Optional and/or lazily loaded modules +from MDAnalysis.lib import lazy +# This makes 'cm' available as an attr of 'matplotlib' +matplotlib = lazy.import_module('matplotlib.cm', level='base') +plt = lazy.import_module('matplotlib.pyplot') logger = logging.getLogger("MDAnalysis.analysis.hole") diff --git a/package/MDAnalysis/analysis/legacy/x3dna.py b/package/MDAnalysis/analysis/legacy/x3dna.py index 633ad9def1b..79062dfe276 100644 --- a/package/MDAnalysis/analysis/legacy/x3dna.py +++ b/package/MDAnalysis/analysis/legacy/x3dna.py @@ -139,11 +139,14 @@ from collections import OrderedDict import numpy as np -import matplotlib.pyplot as plt from MDAnalysis import ApplicationError from MDAnalysis.lib.util import which, realpath, asiterable +# Optional and/or lazily loaded modules +from MDAnalysis.lib import lazy +plt = lazy.import_module('matplotlib.pyplot') + import logging logger = logging.getLogger("MDAnalysis.analysis.x3dna") diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py index 7e828a166bd..eb748c26ae3 100644 --- a/package/MDAnalysis/analysis/pca.py +++ b/package/MDAnalysis/analysis/pca.py @@ -106,14 +106,16 @@ import warnings import numpy as np -import scipy.integrate from MDAnalysis import Universe from MDAnalysis.analysis.align import _fit_to from MDAnalysis.lib.log import ProgressMeter -from .base import AnalysisBase +from MDAnalysis.analysis.base import AnalysisBase +# Optional and/or lazily imported modules +from MDAnalysis.lib import lazy +scipy = lazy.import_function('scipy.integrate', level='base') class PCA(AnalysisBase): """Principal component analysis on an MD trajectory. diff --git a/package/MDAnalysis/analysis/polymer.py b/package/MDAnalysis/analysis/polymer.py index 355a063eaee..ec0fe4ffcb9 100644 --- a/package/MDAnalysis/analysis/polymer.py +++ b/package/MDAnalysis/analysis/polymer.py @@ -40,9 +40,14 @@ import logging -from .. import NoDataError -from ..lib.distances import calc_bonds -from .base import AnalysisBase +from MDAnalysis import NoDataError +from MDAnalysis.lib.distances import calc_bonds +from MDAnalysis.analysis.base import AnalysisBase + +# Optional and/or lazily loaded modules +from MDAnalysis.lib import lazy +curve_fit = lazy.import_function('scipy.optimize.curve_fit') +plt = lazy.import_module('matplotlib.pyplot') logger = logging.getLogger(__name__) @@ -138,7 +143,6 @@ def perform_fit(self): def plot(self, ax=None): """Oooh fancy""" - import matplotlib.pyplot as plt if ax is None: ax = plt.gca() ax.plot(self.x, self.results, 'ro', label='Result') diff --git a/package/MDAnalysis/analysis/psa.py b/package/MDAnalysis/analysis/psa.py index 62d302211dd..a8c7cd03a9d 100644 --- a/package/MDAnalysis/analysis/psa.py +++ b/package/MDAnalysis/analysis/psa.py @@ -216,8 +216,6 @@ from six.moves import range, cPickle import numpy as np -from scipy import spatial, cluster -import matplotlib import warnings import numbers @@ -226,6 +224,18 @@ import MDAnalysis.analysis.align from MDAnalysis import NoDataError +# Optional and/or lazily loaded modules +#from scipy import spatial, cluster +#import matplotlib +from MDAnalysis.lib import lazy +spatial = lazy.import_module("scipy.spatial") +cluster = lazy.import_module("scipy.cluster") + +matplotlib = lazy.import_module('matplotlib') +plt = lazy.import_module('matplotlib.pyplot') + +sns = lazy.import_module('seaborn.apionly') + import os import logging @@ -1689,7 +1699,6 @@ def plot(self, filename=None, linkage='ward', count_sort=False, clustered distance matrix (reordered) """ - from matplotlib.pyplot import figure, colorbar, cm, savefig, clf if self.D is None: err_str = "No distance data; do 'PSAnalysis.run(store=True)' first." @@ -1699,14 +1708,14 @@ def plot(self, filename=None, linkage='ward', count_sort=False, dgram_loc, hmap_loc, cbar_loc = self._get_plot_obj_locs() aspect_ratio = 1.25 - clf() - fig = figure(figsize=(figsize*aspect_ratio, figsize)) + plt.clf() + fig = plt.figure(figsize=(figsize*aspect_ratio, figsize)) ax_hmap = fig.add_axes(hmap_loc) ax_dgram = fig.add_axes(dgram_loc) - Z, dgram = self.cluster(dist_matrix, \ - method=linkage, \ - count_sort=count_sort, \ + Z, dgram = self.cluster(dist_matrix, + method=linkage, + count_sort=count_sort, distance_sort=distance_sort) rowidx = colidx = dgram['leaves'] # get row-wise ordering from clustering ax_dgram.invert_yaxis() # Place origin at up left (from low left) @@ -1714,26 +1723,44 @@ def plot(self, filename=None, linkage='ward', count_sort=False, minDist, maxDist = 0, np.max(dist_matrix) dist_matrix_clus = dist_matrix[rowidx,:] dist_matrix_clus = dist_matrix_clus[:,colidx] - im = ax_hmap.matshow(dist_matrix_clus, aspect='auto', origin='lower', \ - cmap=cm.YlGn, vmin=minDist, vmax=maxDist) + im = ax_hmap.matshow(dist_matrix_clus, + aspect='auto', + origin='lower', + cmap=plt.cm.YlGn, + vmin=minDist, + vmax=maxDist) ax_hmap.invert_yaxis() # Place origin at upper left (from lower left) ax_hmap.locator_params(nbins=npaths) ax_hmap.set_xticks(np.arange(npaths), minor=True) ax_hmap.set_yticks(np.arange(npaths), minor=True) - ax_hmap.tick_params(axis='x', which='both', labelleft='off', \ - labelright='off', labeltop='on', labelsize=0) - ax_hmap.tick_params(axis='y', which='both', labelleft='on', \ - labelright='off', labeltop='off', labelsize=0) + ax_hmap.tick_params(axis='x', + which='both', + labelleft='off', + labelright='off', + labeltop='on', + labelsize=0) + ax_hmap.tick_params(axis='y', + which='both', + labelleft='on', + labelright='off', + labeltop='off', + labelsize=0) rowlabels = [self.labels[i] for i in rowidx] collabels = [self.labels[i] for i in colidx] - ax_hmap.set_xticklabels(collabels, rotation='vertical', \ - size=(labelsize-4), multialignment='center', minor=True) - ax_hmap.set_yticklabels(rowlabels, rotation='horizontal', \ - size=(labelsize-4), multialignment='left', ha='right', \ - minor=True) + ax_hmap.set_xticklabels(collabels, + rotation='vertical', + size=(labelsize-4), + multialignment='center', + minor=True) + ax_hmap.set_yticklabels(rowlabels, + rotation='horizontal', + size=(labelsize-4), + multialignment='left', + ha='right', + minor=True) ax_color = fig.add_axes(cbar_loc) - colorbar(im, cax=ax_color, ticks=np.linspace(minDist, maxDist, 10), \ + plt.colorbar(im, cax=ax_color, ticks=np.linspace(minDist, maxDist, 10), \ format="%0.1f") ax_color.tick_params(labelsize=labelsize) @@ -1756,7 +1783,7 @@ def plot(self, filename=None, linkage='ward', count_sort=False, if filename is not None: head = self.targetdir + self.datadirs['plots'] outfile = os.path.join(head, filename) - savefig(outfile, dpi=300, bbox_inches='tight') + plt.savefig(outfile, dpi=300, bbox_inches='tight') return Z, dgram, dist_matrix_clus @@ -1807,28 +1834,6 @@ def plot_annotated_heatmap(self, filename=None, linkage='ward', \ .. _seaborn: https://seaborn.pydata.org/ """ - from matplotlib.pyplot import figure, colorbar, cm, savefig, clf - - try: - import seaborn.apionly as sns - except ImportError: - raise ImportError( - """ERROR --- The seaborn package cannot be found! - - The seaborn API could not be imported. Please install it first. - You can try installing with pip directly from the - internet: - - pip install seaborn - - Alternatively, download the package from - - http://pypi.python.org/pypi/seaborn/ - - and install in the usual manner. - """ - ) - if self.D is None: err_str = "No distance data; do 'PSAnalysis.run(store=True)' first." raise ValueError(err_str) @@ -1843,14 +1848,18 @@ def plot_annotated_heatmap(self, filename=None, linkage='ward', \ dist_matrix_clus = dist_matrix[rowidx,:] dist_matrix_clus = dist_matrix_clus[:,colidx] - clf() + plt.clf() aspect_ratio = 1.25 - fig = figure(figsize=(figsize*aspect_ratio, figsize)) + fig = plt.figure(figsize=(figsize*aspect_ratio, figsize)) ax_hmap = fig.add_subplot(111) - ax_hmap = sns.heatmap(dist_matrix_clus, \ - linewidths=0.25, cmap=cm.YlGn, annot=True, fmt='3.1f', \ - square=True, xticklabels=rowidx, yticklabels=colidx, \ - annot_kws={"size": 7}, ax=ax_hmap) + ax_hmap = sns.heatmap(dist_matrix_clus, + linewidths=0.25, cmap=plt.cm.YlGn, + annot=True, fmt='3.1f', + square=True, + xticklabels=rowidx, + yticklabels=colidx, + annot_kws={"size": 7}, + ax=ax_hmap) # Remove major ticks from both heat map axes for tic in ax_hmap.xaxis.get_major_ticks(): @@ -1868,7 +1877,7 @@ def plot_annotated_heatmap(self, filename=None, linkage='ward', \ if filename is not None: head = self.targetdir + self.datadirs['plots'] outfile = os.path.join(head, filename) - savefig(outfile, dpi=600, bbox_inches='tight') + plt.savefig(outfile, dpi=600, bbox_inches='tight') return Z, dgram, dist_matrix_clus @@ -1918,27 +1927,6 @@ def plot_nearest_neighbors(self, filename=None, idx=0, \ .. _seaborn: https://seaborn.pydata.org/ """ - from matplotlib.pyplot import figure, savefig, tight_layout, clf, show - try: - import seaborn.apionly as sns - except ImportError: - raise ImportError( - """ERROR --- The seaborn package cannot be found! - - The seaborn API could not be imported. Please install it first. - You can try installing with pip directly from the - internet: - - pip install seaborn - - Alternatively, download the package from - - http://pypi.python.org/pypi/seaborn/ - - and install in the usual manner. - """ - ) - colors = sns.xkcd_palette(["cherry", "windows blue"]) if self._NN is None: @@ -1949,8 +1937,8 @@ def plot_nearest_neighbors(self, filename=None, idx=0, \ sns.set_style('whitegrid') if not multiplot: - clf() - fig = figure(figsize=(figsize*aspect_ratio, figsize)) + plt.clf() + fig = plt.figure(figsize=(figsize*aspect_ratio, figsize)) ax = fig.add_subplot(111) nn_dist_P, nn_dist_Q = self._NN[idx]['distances'] @@ -1968,12 +1956,12 @@ def plot_nearest_neighbors(self, filename=None, idx=0, \ ax.tick_params(axis='both', which='major', labelsize=12, pad=4) sns.despine(bottom=True, left=True, ax=ax) - tight_layout() + plt.tight_layout() if filename is not None: head = self.targetdir + self.datadirs['plots'] outfile = os.path.join(head, filename) - savefig(outfile, dpi=300, bbox_inches='tight') + plt.savefig(outfile, dpi=300, bbox_inches='tight') return ax diff --git a/package/MDAnalysis/lib/lazy.py b/package/MDAnalysis/lib/lazy.py new file mode 100644 index 00000000000..eaaff6f9b5e --- /dev/null +++ b/package/MDAnalysis/lib/lazy.py @@ -0,0 +1,271 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- http://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +# This module was based on code from the importing module from the PEAK +# package (see http://peak.telecommunity.com/DevCenter/FrontPage). The PEAK +# package is released under the following license, reproduced here: +# +# Copyright (C) 1996-2004 by Phillip J. Eby and Tyler C. Sarna. +# All rights reserved. This software may be used under the same terms +# as Zope or Python. THERE ARE ABSOLUTELY NO WARRANTIES OF ANY KIND. +# Code quality varies between modules, from "beta" to "experimental +# pre-alpha". :) +# +# The following list summarizes the modifications to the importing code: +# - a replacement of lazyModule (import_module, which defers most work to +# _import_module) is implemented that uses an alternative LazyModule class; +# - a different LazyModule class is created per instance, so that reverting +# the __getattribute__ behavior can be done safely; +# - a function to lazily import module functions was added. + + +""" +Lazy module loading --- :mod:`MDAnalysis.lib.lazy` +==================================================== + +Functions and classes for lazy module loading that also delay import errors. +Heavily borrowed from the `importing`_ module, which is not very +subclass-friendly. + +.. versionadded:: 0.16.2 +.. _`importing`: http://peak.telecommunity.com/DevCenter/Importing + +Files and directories +--------------------- + +.. autofunction:: import_module +.. autofunction:: import_function + +""" + +__all__ = ['import_module', 'import_function'] + +from types import ModuleType +import sys +import imp + +_MSG = ("{0} attempted to use a functionality that requires module {1}, but " + "it couldn't be loaded. Please install {2} and retry.") + +_MSG_FN = ("{0} attempted to use a functionality that requires function {1} " + "of module {2}, but it couldn't be found in that module. Please " + "install a version of {2} that has {1} and retry.") + +class LazyModule(ModuleType): + # peak.util.imports sets __slots__ to (), but it seems pointless because + # the base ModuleType doesn't itself set __slots__. + #__mda_lazy_armed__ = True + + def __init__(self, modname): + super(ModuleType, self).__setattr__('__name__', modname) + + def __getattribute__(self, attr): + #if (attr != '__mda_lazy_armed__' and + # self.__mda_lazy_armed__): + print("getting attr {} from module '{}'".format(attr, + super(ModuleType, self).__getattribute__('__name__'))) + _load_module(self) + return ModuleType.__getattribute__(self, attr) + + def __setattr__(self, attr, value): + #if attr != '__mda_lazy_armed__' and self.__mda_lazy_armed__: + print("setting attr {}".format(attr)) + _load_module(self) + return ModuleType.__setattr__(self, attr, value) + +def _load_module(module): + modclass = type(module) + # We only take care of our own LazyModule instances + if not issubclass(modclass, LazyModule): + return + imp.acquire_lock() + try: + modclass.__getattribute__ = ModuleType.__getattribute__ + modclass.__setattr__ = ModuleType.__setattr__ + try: + # Alreay-loaded _LazyModule classes lose their + # _mda_lazy_caller_name attr. No need to redo + # those cases. + caller_name = modclass._mda_lazy_caller_name + except AttributeError: + return + del modclass._mda_lazy_caller_name + # don't reload if already loaded! + #if module.__dict__.keys() == ['__name__']: + #if (set(ModuleType.__getattribute__(module, '__dict__').keys()) == + # set(('__name__', '_mda_lazy_caller_name'))): + print("loading module '{}'".format(module)) + #module.__mda_lazy_armed__ = False + # First, ensure the parent is loaded + # (using recursion; negligible chance we'll ever hit a stack limit + # in this case). + parent, _, modname = module.__name__.rpartition('.') + if parent: + _load_module(sys.modules[parent]) + setattr(sys.modules[parent], modname, module) + # Get Python to do the real import! + try: + reload(module) + except: + #module.__mda_lazy_armed__ = True + del modclass.__getattribute__ + del modclass.__setattr__ + modclass._mda_lazy_caller_name = caller_name + raise + #del module.__mda_lazy_armed__ + print("done loading module '{}'".format(module)) + except ImportError as err: + print("Got an ImportError: '{}'".format(err)) + modname = ModuleType.__getattribute__(module, '__name__') + base_modname = modname.split(".")[0] + raise ImportError(_MSG.format(caller_name, modname, base_modname)) + finally: + imp.release_lock() + +def _caller_name(depth=2): + # the presence of sys._getframe might be implementation-dependent. + # It isn't that serious if we can't get the caller's name. + try: + return sys._getframe(depth).f_globals['__name__'] + except AttributeError: + return 'MDAnalysis' + +def import_module(modname, level='leaf'): + """Function allowing lazy importing of a module into the namespace + + Parameters + ---------- + modname : str + The module to import. + level : str, optional + Which submodule reference to return. Either a reference to the 'leaf' + module (the default) or to the 'base' module. For 'base':: + + MDAnalysis = import_module("MDAnalysis.analysis.distances", + level='base') + # 'MDAnalysis' becomes defined in the current namespace, with + # (sub)attributes 'MDAnalysis.analysis' and + # 'MDAnalysis.analysis.distances'. + # Equivalent to: + import MDAnalysis.analysis.distances + + For 'leaf':: + + distances = import_module("MDAnalysis.analysis.distances", + level='leaf') + # Only 'distances' becomes set in the current namespace. + # Equivalent to: + from MDAnalysis.analysis import distances + + Returns + ------- + module + The module specified by *modname*, or its base, depending on *level*. + The module isn't immediately imported. Instead, a + :class:`MDAnalysis.lib.lazy.LazyModule` instance is returned. Upon + access to any of its attributes, the module is finally loaded. + + .. versionadded:: 0.16.2 + + """ + mod = _import_module(modname, _caller_name()) + if level == 'base': + return sys.modules[modname.split('.')[0]] + elif level == 'leaf': + return mod + else: + raise ValueError("Parameter 'level' must be one of ('base', 'leaf')") + +def _import_module(modname, caller_name): + imp.acquire_lock() + try: + fullmodname = modname + fullsubmodname = None + # ensure parent module/package is in sys.modules + # and parent.modname=module, as soon as the parent is imported + while modname: + try: + mod = sys.modules[modname] + # We reached a (base) module that's already loaded. Let's stop + # the cycle. + modname = '' + except KeyError: + class _LazyModule(LazyModule): + _mda_lazy_caller_name = caller_name + mod = sys.modules[modname] = _LazyModule(modname) + if fullsubmodname: + ModuleType.__setattr__(mod, submodname, + sys.modules[fullsubmodname]) + fullsubmodname = modname + modname, _, submodname = modname.rpartition('.') + return sys.modules[fullmodname] + finally: + imp.release_lock() + +def import_function(modname, *funcnames): + """Function allowing lazy importing of a function into the namespace + + Parameters + ---------- + modname : str + The base module from where to import the function(s) in *funcnames*, + or a full 'module_name.function_name' string. + funcnames : str (optional) + The function name(s) to import from the module specified by *modname*. + If left empty *modname* is assumed to also include the function name + to import. + + Returns + ------- + function or list of functions + If *funcnames* is passed, a list of imported functions -- one for each + element in *funcnames* -- is returned. + If only *modnames* is passed it is assumed to be a full + 'module_name.function_name' string, in which case the imported function + is returned directly, and not in a list. + The module specified by *modname* is always imported lazily, via + :func:`MDAnalysis.lib.lazy.import_module`. + + See Also + -------- + :func:`MDAnalysis.lib.lazy.import_module` + + .. versionadded:: 0.16.2 + + """ + if not funcnames: + # We allow passing a single string as 'modname.funcname', + # in which case the function is returned directly and not as a list. + modname, funcname = modname.rsplit(".", 1) + return _import_function(modname, funcname, _caller_name()) + else: + return [_import_function(modname, fn, _caller_name()) for fn in funcnames] + +def _import_function(modname, funcname, caller_name): + module = _import_module(modname, caller_name) + + def retfun(*args, **kwargs): + try: + return getattr(module, funcname)(*args, **kwargs) + except AttributeError: + raise AttributeError(_MSG_FN.format(caller_name, funcname, modname)) + return retfun + diff --git a/package/MDAnalysis/visualization/streamlines.py b/package/MDAnalysis/visualization/streamlines.py index c8a81b6e299..281c751728b 100644 --- a/package/MDAnalysis/visualization/streamlines.py +++ b/package/MDAnalysis/visualization/streamlines.py @@ -30,7 +30,7 @@ The :func:`generate_streamlines` function can generate a 2D flow field from a MD trajectory, for instance, lipid molecules in a flat membrane. It can make -use of multiple cores to perform the analyis in parallel (using +use of multiple cores to perform the analysis in parallel (using :mod:`multiprocessing`). See Also @@ -47,19 +47,11 @@ import multiprocessing import numpy as np -import scipy - -try: - import matplotlib - import matplotlib.path -except ImportError: - raise ImportError( - '2d streamplot module requires: matplotlib.path for its path.Path.contains_points method. The installation ' - 'instructions for the matplotlib module can be found here: ' - 'http://matplotlib.org/faq/installing_faq.html?highlight=install') - import MDAnalysis +# Optional and/or lazily loaded modules +from MDAnalysis.lib import lazy +matplotlib = lazy.import_module('matplotlib.path', level='base') def produce_grid(tuple_of_limits, grid_spacing): diff --git a/package/MDAnalysis/visualization/streamlines_3D.py b/package/MDAnalysis/visualization/streamlines_3D.py index c735b15dfdd..e713cb93fdd 100644 --- a/package/MDAnalysis/visualization/streamlines_3D.py +++ b/package/MDAnalysis/visualization/streamlines_3D.py @@ -50,11 +50,14 @@ import numpy as np import numpy.testing -import scipy -import scipy.spatial.distance import MDAnalysis +# Optional and/or lazily loaded modules +from MDAnalysis.lib import lazy +scipy = lazy.import_module('scipy.spatial.distance', level='base') + + def determine_container_limits(topology_file_path, trajectory_file_path, buffer_value): """Calculate the extent of the atom coordinates + buffer. diff --git a/testsuite/MDAnalysisTests/analysis/test_distances.py b/testsuite/MDAnalysisTests/analysis/test_distances.py index d2b422f6aa5..7b176076f90 100644 --- a/testsuite/MDAnalysisTests/analysis/test_distances.py +++ b/testsuite/MDAnalysisTests/analysis/test_distances.py @@ -27,7 +27,6 @@ import MDAnalysis from MDAnalysisTests import module_not_found from MDAnalysisTests.datafiles import GRO -from MDAnalysisTests.util import block_import import MDAnalysis.analysis.distances diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py index aff0ff6f11d..a6a67883f69 100644 --- a/testsuite/MDAnalysisTests/analysis/test_encore.py +++ b/testsuite/MDAnalysisTests/analysis/test_encore.py @@ -30,11 +30,10 @@ import sys import warnings -from numpy.testing import (TestCase, dec, assert_equal, assert_almost_equal, - assert_warns) +from numpy.testing import (TestCase, dec, assert_equal, assert_almost_equal) from MDAnalysisTests.datafiles import DCD, DCD2, PSF, TPR, XTC -from MDAnalysisTests import parser_not_found, module_not_found, block_import +from MDAnalysisTests import parser_not_found, module_not_found import MDAnalysis.analysis.rms as rms import MDAnalysis.analysis.align as align @@ -824,22 +823,3 @@ def test_get_distance_matrix(self): # Issue #1324 u = mda.Universe(TPR,XTC) dm = confdistmatrix.get_distance_matrix(u) - -class TestEncoreImportWarnings(object): - def setUp(self): - # clear cache of encore module - for mod in list(sys.modules): # list as we're changing as we iterate - if 'encore' in mod: - sys.modules.pop(mod, None) - - @block_import('sklearn') - def _check_sklearn_import_warns(self, package): - warnings.simplefilter('always') - assert_warns(ImportWarning, importlib.import_module, package) - - def test_import_warnings(self): - for pkg in ( - 'MDAnalysis.analysis.encore.dimensionality_reduction.DimensionalityReductionMethod', - 'MDAnalysis.analysis.encore.clustering.ClusteringMethod', - ): - yield self._check_sklearn_import_warns, pkg