From dcbae273cb1199a93bfec411af964a1183d43d75 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Thu, 15 Jun 2017 19:37:48 -0700 Subject: [PATCH] scipy and matplotlib are imported at top in analysis - updated all modules - removed any code that guards against scipy or matplotlib import - conforms to style guide https://github.com/MDAnalysis/mdanalysis/wiki/Style-Guide#module-imports-in-mdanalysisanalysis - fixes #1159 - fixes #1361 --- package/MDAnalysis/analysis/distances.py | 23 +---- .../MDAnalysis/analysis/encore/similarity.py | 36 ++----- .../analysis/hbonds/hbond_autocorrel.py | 25 ++--- package/MDAnalysis/analysis/hole.py | 16 ++- package/MDAnalysis/analysis/legacy/x3dna.py | 5 +- package/MDAnalysis/analysis/pca.py | 7 +- package/MDAnalysis/analysis/polymer.py | 7 +- package/MDAnalysis/analysis/psa.py | 99 ++++++++++++++----- 8 files changed, 114 insertions(+), 104 deletions(-) diff --git a/package/MDAnalysis/analysis/distances.py b/package/MDAnalysis/analysis/distances.py index ae4ad699141..d524a993625 100644 --- a/package/MDAnalysis/analysis/distances.py +++ b/package/MDAnalysis/analysis/distances.py @@ -42,6 +42,7 @@ 'contact_matrix', 'dist', 'between'] import numpy as np +import scipy.sparse from MDAnalysis.lib.distances import distance_array, self_distance_array from MDAnalysis.lib.c_distances import contact_matrix_no_pbc, contact_matrix_pbc @@ -51,15 +52,6 @@ import logging logger = logging.getLogger("MDAnalysis.analysis.distances") -try: - from scipy import sparse -except ImportError: - sparse = None - msg = "scipy.sparse could not be imported: some functionality will " \ - "not be available in contact_matrix()" - warnings.warn(msg, category=ImportWarning) - logger.warn(msg) - del msg def contact_matrix(coord, cutoff=15.0, returntype="numpy", box=None): '''Calculates a matrix of contacts. @@ -93,12 +85,6 @@ def contact_matrix(coord, cutoff=15.0, returntype="numpy", box=None): The contact matrix is returned in a format determined by the `returntype` keyword. - - Note - ---- - :mod:`scipy.sparse` is require for using *sparse* matrices; if it cannot - be imported then an `ImportError` is raised. - See Also -------- :mod:`MDAnalysis.analysis.contacts` for native contact analysis @@ -112,14 +98,9 @@ def contact_matrix(coord, cutoff=15.0, returntype="numpy", box=None): adj = (distance_array(coord, coord, box=box) < cutoff) return adj elif returntype == "sparse": - if sparse is None: - # hack: if we are running with minimal dependencies then scipy was - # not imported and we have to bail here (see scipy import at top) - raise ImportError("For sparse matrix functionality you need to " - "import scipy.") # Initialize square List of Lists matrix of dimensions equal to number # of coordinates passed - sparse_contacts = sparse.lil_matrix((len(coord), len(coord)), dtype='bool') + sparse_contacts = scipy.sparse.lil_matrix((len(coord), len(coord)), dtype='bool') if box is not None: # with PBC contact_matrix_pbc(coord, sparse_contacts, box, cutoff) diff --git a/package/MDAnalysis/analysis/encore/similarity.py b/package/MDAnalysis/analysis/encore/similarity.py index bd7e9e5e0ea..a56b3c0c1e5 100644 --- a/package/MDAnalysis/analysis/encore/similarity.py +++ b/package/MDAnalysis/analysis/encore/similarity.py @@ -172,21 +172,13 @@ from __future__ import print_function, division, absolute_import from six.moves import range, zip -import MDAnalysis as mda -import numpy as np import warnings import logging -try: - from scipy.stats import gaussian_kde -except ImportError: - gaussian_kde = None - msg = "scipy.stats.gaussian_kde could not be imported. " \ - "Dimensionality reduction ensemble comparisons will not " \ - "be available." - warnings.warn(msg, - category=ImportWarning) - logging.warn(msg) - del msg + +import numpy as np +import scipy.stats + +import MDAnalysis as mda from ...coordinates.memory import MemoryReader from .confdistmatrix import get_distance_matrix @@ -460,18 +452,11 @@ def gen_kde_pdfs(embedded_space, ensemble_assignment, nensembles, embedded_ensembles = [] resamples = [] - if gaussian_kde is None: - # hack: if we are running with minimal dependencies then scipy was - # not imported and we have to bail here (see scipy import at top) - raise ImportError("For Kernel Density Estimation functionality you" - "need to import scipy") - for i in range(1, nensembles + 1): this_embedded = embedded_space.transpose()[ np.where(np.array(ensemble_assignment) == i)].transpose() embedded_ensembles.append(this_embedded) - kdes.append(gaussian_kde( - this_embedded)) + kdes.append(scipy.stats.gaussian_kde(this_embedded)) # # Set number of samples # if not nsamples: @@ -623,12 +608,6 @@ def cumulative_gen_kde_pdfs(embedded_space, ensemble_assignment, nensembles, """ - if gaussian_kde is None: - # hack: if we are running with minimal dependencies then scipy was - # not imported and we have to bail here (see scipy import at top) - raise ImportError("For Kernel Density Estimation functionality you" - "need to import scipy") - kdes = [] embedded_ensembles = [] resamples = [] @@ -639,8 +618,7 @@ def cumulative_gen_kde_pdfs(embedded_space, ensemble_assignment, nensembles, np.logical_and(ensemble_assignment >= ens_id_min, ensemble_assignment <= i))].transpose() embedded_ensembles.append(this_embedded) - kdes.append( - gaussian_kde(this_embedded)) + kdes.append(scipy.stats.gaussian_kde(this_embedded)) # Resample according to probability distributions for this_kde in kdes: diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py index a0bf6bb5451..bf81e3529c5 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py @@ -155,6 +155,8 @@ from __future__ import division, absolute_import from six.moves import zip import numpy as np +import scipy.optimize + import warnings from MDAnalysis.lib.log import ProgressMeter @@ -162,7 +164,7 @@ class HydrogenBondAutoCorrel(object): - """Perform a time autocorrelation of the hydrogen bonds in the system. + """Perform a time autocorrelation of the hydrogen bonds in the system. Parameters ---------- @@ -421,8 +423,9 @@ def solve(self, p_guess=None): Initial guess for the leastsq fit, must match the shape of the expected coefficients - Continuous defition results are fitted to a double exponential, - intermittent definition are fit to a triple exponential. + Continuous defition results are fitted to a double exponential with + :func:`scipy.optimize.leastsq`, intermittent definition are fit to a + triple exponential. The results of this fitting procedure are saved into the *fit*, *tau* and *estimate* keywords in the solution dict. @@ -434,14 +437,14 @@ def solve(self, p_guess=None): - *estimate* contains the estimate provided by the fit of the time autocorrelation function - In addition, the output of the leastsq function is saved into the - solution dict + In addition, the output of the :func:`~scipy.optimize.leastsq` function + is saved into the solution dict - *infodict* - *mesg* - *ier* + """ - from scipy.optimize import leastsq if self.solution['results'] is None: raise ValueError( @@ -498,9 +501,8 @@ def triple(x, A1, A2, tau1, tau2, tau3): if p_guess is None: p_guess = (0.5, 10 * self.sample_time, self.sample_time) - p, cov, infodict, mesg, ier = leastsq(err, p_guess, - args=(time, results), - full_output=True) + p, cov, infodict, mesg, ier = scipy.optimize.leastsq( + err, p_guess, args=(time, results), full_output=True) self.solution['fit'] = p A1, tau1, tau2 = p A2 = 1 - A1 @@ -512,9 +514,8 @@ def triple(x, A1, A2, tau1, tau2, tau3): p_guess = (0.33, 0.33, 10 * self.sample_time, self.sample_time, 0.1 * self.sample_time) - p, cov, infodict, mesg, ier = leastsq(err, p_guess, - args=(time, results), - full_output=True) + p, cov, infodict, mesg, ier = scipy.optimize.leastsq( + err, p_guess, args=(time, results), full_output=True) self.solution['fit'] = p A1, A2, tau1, tau2, tau3 = p A3 = 1 - A1 - A2 diff --git a/package/MDAnalysis/analysis/hole.py b/package/MDAnalysis/analysis/hole.py index e43316a868c..7088e5f8a7c 100644 --- a/package/MDAnalysis/analysis/hole.py +++ b/package/MDAnalysis/analysis/hole.py @@ -245,7 +245,6 @@ from six.moves import zip, cPickle import six -import numpy as np import glob import os import errno @@ -258,6 +257,10 @@ import logging from itertools import cycle +import numpy as np +import matplotlib +import matplotlib.pyplot as plt + from MDAnalysis import Universe from MDAnalysis.exceptions import ApplicationError from MDAnalysis.lib.util import which, realpath, asiterable @@ -370,8 +373,6 @@ def save(self, filename="hole.pickle"): cPickle.dump(self.profiles, open(filename, "wb"), cPickle.HIGHEST_PROTOCOL) def _process_plot_kwargs(self, kwargs): - import matplotlib.colors - kw = {} frames = kwargs.pop('frames', None) if frames is None: @@ -448,9 +449,6 @@ def plot(self, **kwargs): Returns ``ax``. """ - - import matplotlib.pyplot as plt - kw, kwargs = self._process_plot_kwargs(kwargs) ax = kwargs.pop('ax', None) @@ -517,8 +515,7 @@ def plot3D(self, **kwargs): Returns ``ax``. """ - - import matplotlib.pyplot as plt + # installed with matplotlib; imported here to enable 3D axes from mpl_toolkits.mplot3d import Axes3D kw, kwargs = self._process_plot_kwargs(kwargs) @@ -540,8 +537,7 @@ def plot3D(self, **kwargs): rxncoord = profile.rxncoord else: # does not seem to work with masked arrays but with nan hack! - # http://stackoverflow.com/questions/4913306/python-matplotlib-mplot3d-how-do-i-set-a-maximum-value - # -for-the-z-axis + # http://stackoverflow.com/questions/4913306/python-matplotlib-mplot3d-how-do-i-set-a-maximum-value-for-the-z-axis #radius = np.ma.masked_greater(profile.radius, rmax) #rxncoord = np.ma.array(profile.rxncoord, mask=radius.mask) rxncoord = profile.rxncoord diff --git a/package/MDAnalysis/analysis/legacy/x3dna.py b/package/MDAnalysis/analysis/legacy/x3dna.py index f960cd1fc5d..99cb4a8fc65 100644 --- a/package/MDAnalysis/analysis/legacy/x3dna.py +++ b/package/MDAnalysis/analysis/legacy/x3dna.py @@ -132,13 +132,15 @@ import errno import shutil import warnings -import numpy as np import os.path import subprocess import tempfile import textwrap from collections import OrderedDict +import numpy as np +import matplotlib.pyplot as plt + from MDAnalysis import ApplicationError from MDAnalysis.lib.util import which, realpath, asiterable @@ -413,7 +415,6 @@ def plot(self, **kwargs): Provide `ax` to have all plots plotted in the same axes. """ - import matplotlib.pyplot as plt na_avg, na_std = self.mean_std() for k in range(len(na_avg[0])): diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py index 18d729ecc08..b9920a27ed3 100644 --- a/package/MDAnalysis/analysis/pca.py +++ b/package/MDAnalysis/analysis/pca.py @@ -106,6 +106,7 @@ import warnings import numpy as np +import scipy.integrate from MDAnalysis import Universe from MDAnalysis.analysis.align import _fit_to @@ -357,9 +358,9 @@ def cosine_content(pca_space, i): .. [BerkHess1] Berk Hess. Convergence of sampling in protein simulations. Phys. Rev. E 65, 031910 (2002). """ - from scipy.integrate import simps + t = np.arange(len(pca_space)) T = len(pca_space) cos = np.cos(np.pi * t * (i + 1) / T) - return ((2.0 / T) * (simps(cos*pca_space[:, i])) ** 2 / - simps(pca_space[:, i] ** 2)) + return ((2.0 / T) * (scipy.integrate.simps(cos*pca_space[:, i])) ** 2 / + scipy.integrate.simps(pca_space[:, i] ** 2)) diff --git a/package/MDAnalysis/analysis/polymer.py b/package/MDAnalysis/analysis/polymer.py index 645055e1878..b7babb204ab 100644 --- a/package/MDAnalysis/analysis/polymer.py +++ b/package/MDAnalysis/analysis/polymer.py @@ -36,6 +36,8 @@ from six.moves import range import numpy as np +import scipy.optimize + import logging from .. import NoDataError @@ -165,13 +167,10 @@ def fit_exponential_decay(x, y): ----- This function assumes that data starts at 1.0 and decays to 0.0 - Requires scipy """ - from scipy.optimize import curve_fit - def expfunc(x, a): return np.exp(-x/a) - a = curve_fit(expfunc, x, y)[0][0] + a = scipy.optimize.curve_fit(expfunc, x, y)[0][0] return a diff --git a/package/MDAnalysis/analysis/psa.py b/package/MDAnalysis/analysis/psa.py index 6c002241a40..f7178d678c1 100644 --- a/package/MDAnalysis/analysis/psa.py +++ b/package/MDAnalysis/analysis/psa.py @@ -216,7 +216,11 @@ from six.moves import range, cPickle import numpy as np -import warnings,numbers +from scipy import spatial, cluster +import matplotlib + +import warnings +import numbers import MDAnalysis import MDAnalysis.analysis.align @@ -396,13 +400,14 @@ def hausdorff(P, Q): Notes ----- - - The Hausdorff distance is calculated in a brute force manner from the - distance matrix without further optimizations, essentially following - [Huttenlocher1993]_. - - :func:`scipy.spatial.distance.directed_hausdorff` is an optimized - implementation of the early break algorithm of [Taha2015]_; note that - one still has to calculate the *symmetric* Hausdorff distance as - `max(directed_hausdorff(P, Q)[0], directed_hausdorff(Q, P)[0])`. + The Hausdorff distance is calculated in a brute force manner from the + distance matrix without further optimizations, essentially following + [Huttenlocher1993]_. + + :func:`scipy.spatial.distance.directed_hausdorff` is an optimized + implementation of the early break algorithm of [Taha2015]_; note that one + still has to calculate the *symmetric* Hausdorff distance as + `max(directed_hausdorff(P, Q)[0], directed_hausdorff(Q, P)[0])`. References ---------- @@ -415,6 +420,10 @@ def hausdorff(P, Q): calculating the exact Hausdorff distance. IEEE Transactions On Pattern Analysis And Machine Intelligence, 37:2153-63, 2015. + SeeAlso + ------- + scipy.spatial.distance.directed_hausdorff + """ N, axis = get_coord_axes(P) d = get_msd_matrix(P, Q, axis=axis) @@ -1650,7 +1659,7 @@ def plot(self, filename=None, linkage='ward', count_sort=False, If `filename` is supplied then the figure is also written to file (the suffix determines the file type, e.g. pdf, png, eps, ...). All other - keyword arguments are passed on to :func:`matplotlib.pyplot.imshow`. + keyword arguments are passed on to :func:`matplotlib.pyplot.matshow`. Parameters @@ -1669,6 +1678,15 @@ def plot(self, filename=None, linkage='ward', count_sort=False, set the font size for colorbar labels; font size for path labels on dendrogram default to 3 points smaller [``12``] + Returns + ------- + Z + `Z` from :meth:`cluster` + dgram + `dgram` from :meth:`cluster` + dist_matrix_clus + clustered distance matrix (reordered) + """ from matplotlib.pyplot import figure, colorbar, cm, savefig, clf @@ -1770,6 +1788,23 @@ def plot_annotated_heatmap(self, filename=None, linkage='ward', \ annot_size : float font size of annotation labels on heat map [``6.5``] + Returns + ------- + Z + `Z` from :meth:`cluster` + dgram + `dgram` from :meth:`cluster` + dist_matrix_clus + clustered distance matrix (reordered) + + + Note + ---- + This function requires the seaborn_ package, which can be installed + with `pip install seaborn` or `conda install seaborn`. + + .. _seaborn: https://seaborn.pydata.org/ + """ from matplotlib.pyplot import figure, colorbar, cm, savefig, clf @@ -1870,6 +1905,17 @@ def plot_nearest_neighbors(self, filename=None, idx=0, \ set the font size for colorbar labels; font size for path labels on dendrogram default to 3 points smaller [``12``] + Returns + ------- + ax : axes + + Note + ---- + This function requires the seaborn_ package, which can be installed + with `pip install seaborn` or `conda install seaborn`. + + .. _seaborn: https://seaborn.pydata.org/ + """ from matplotlib.pyplot import figure, savefig, tight_layout, clf, show try: @@ -1927,7 +1973,8 @@ def plot_nearest_neighbors(self, filename=None, idx=0, \ head = self.targetdir + self.datadirs['plots'] outfile = os.path.join(head, filename) savefig(outfile, dpi=300, bbox_inches='tight') - show() + + return ax def cluster(self, distArray, method='ward', count_sort=False, \ @@ -1955,22 +2002,28 @@ def cluster(self, distArray, method='ward', count_sort=False, \ Returns ------- - list + Z + output from :func:`scipy.cluster.hierarchy.linkage`; list of indices representing the row-wise order of the objects after clustering + dgram + output from :func:`scipy.cluster.hierarchy.dendrogram` """ - import matplotlib - from scipy.cluster.hierarchy import linkage, dendrogram - + # perhaps there is a better way to manipulate the plot... or perhaps it + # is not even necessary? In any case, the try/finally makes sure that + # we are not permanently changing the user's global state + orig_linewidth = matplotlib.rcParams['lines.linewidth'] matplotlib.rcParams['lines.linewidth'] = 0.5 - - Z = linkage(distArray, method=method) - dgram = dendrogram(Z, no_labels=no_labels, orientation='left', \ - count_sort=count_sort, distance_sort=distance_sort, \ - no_plot=no_plot, color_threshold=color_threshold) + try: + Z = cluster.hierarchy.linkage(distArray, method=method) + dgram = cluster.hierarchy.dendrogram( + Z, no_labels=no_labels, orientation='left', + count_sort=count_sort, distance_sort=distance_sort, + no_plot=no_plot, color_threshold=color_threshold) + finally: + matplotlib.rcParams['lines.linewidth'] = orig_linewidth return Z, dgram - def _get_plot_obj_locs(self): """Find and return coordinates for dendrogram, heat map, and colorbar. @@ -2005,7 +2058,8 @@ def get_num_atoms(self): Returns ------- - the number of atoms + int + the number of atoms Note ---- @@ -2077,8 +2131,7 @@ def get_pairwise_distances(self, vectorform=False): err_str = "No distance data; do 'PSAnalysis.run(store=True)' first." raise ValueError(err_str) if vectorform: - from scipy.spatial.distance import squareform - return squareform(self.D) + return spatial.distance.squareform(self.D) else: return self.D