-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Uncorrelation #5
Changes from 4 commits
7a89b15
970bef2
d7840cb
efe0884
3b4cbe9
5c41cc2
639a5d5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
#!/usr/bin/env python | ||
|
||
import argparse | ||
import pandas as pd | ||
|
||
|
||
def get_available_plugin_ids(type): | ||
|
@@ -98,14 +99,15 @@ def main(): | |
parser.add_argument('-q', '--suffix', dest='suffix', help='Suffix for datafile sets, i.e. \'xvg\' (default).', default='xvg') | ||
parser.add_argument('-e', dest='estimators', type=str, default=None, help="Comma separated Estimator methods") | ||
parser.add_argument('-n', '--uncorr', dest='uncorr', help='The observable to be used for the autocorrelation analysis; either \'dhdl_all\' (obtained as a sum over all energy components) or \'dhdl\' (obtained as a sum over those energy components that are changing; default) or \'dE\'. In the latter case the energy differences dE_{i,i+1} (dE_{i,i-1} for the last lambda) are used.', default='dhdl') | ||
parser.add_argument('-i', '--uncorr_threshold', dest='uncorr_threshold', help='Proceed with correlated samples (N) if the number of uncorrelated samples (N_k) is found to be less than this number. If 0 is given, the time series analysis will not be performed at all. Default: 50.', default=50, type=int) | ||
parser.add_argument('-r', '--decimal', dest='decimal', help='The number of decimal places the free energies are to be reported with. No worries, this is for the text output only; the full-precision data will be stored in \'results.pickle\'. Default: 3.', default=3, type=int) | ||
parser.add_argument('-o', '--output', dest='output', type=str, default=None, help="Output methods") | ||
parser.add_argument('-a', '--software', dest='software', help='Package\'s name the data files come from: Gromacs, Sire, Desmond, or AMBER. Default: Gromacs.', default='Gromacs') | ||
parser.add_argument('-s', '--skiptime', dest='equiltime', help='Discard data prior to this specified time as \'equilibration\' data. Units picoseconds. Default: 0 ps.', default=0, type=float) | ||
args = parser.parse_args() | ||
|
||
parser = load_plugin_by_name('parser', args.software, args.temperature, args.prefix, args.suffix) | ||
uncorrelator = load_plugin_by_name('uncorrelate', args.uncorr) | ||
uncorrelator = load_plugin_by_name('uncorrelate', args.uncorr, args.uncorr_threshold) | ||
outputs = load_plugins('output', argsplit(args.output)) | ||
estimators = load_plugins('estimator', argsplit(args.estimators)) | ||
|
||
|
@@ -127,15 +129,24 @@ def main(): | |
u_nks = parser.get_u_nks() | ||
|
||
# Step 2: Uncorrelate the data | ||
if uncorrelator.needs_dhdls: | ||
uncorrelator.set_dhdls(dhdls) | ||
if uncorrelator.needs_u_nks: | ||
uncorrelator.set_u_nks(u_nks) | ||
|
||
if do_dhdl: | ||
dhdls = uncorrelator.uncorrelate(dhdls, args.equiltime) | ||
if do_u_nks: | ||
u_nks = uncorrelator.uncorrelate(u_nks, args.equiltime) | ||
if args.uncorr_threshold > 0: | ||
if uncorrelator.needs_dhdls: | ||
uncorrelator.set_dhdls(dhdls) | ||
if uncorrelator.needs_u_nks: | ||
uncorrelator.set_u_nks(u_nks) | ||
|
||
if do_dhdl: | ||
print("Uncorrelating dH/dl ...") | ||
dhdls = uncorrelator.uncorrelate(dhdls, args.equiltime) | ||
if do_u_nks: | ||
print("Uncorrelating reduced potentials ...") | ||
u_nks = uncorrelator.uncorrelate(u_nks, args.equiltime) | ||
|
||
# concat data for estimators | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I moved the concatenation out of the correlation analysis functions because a threshold of 0 needs to skip correlators but still concatenates for estimators. |
||
if u_nks is not None: | ||
u_nks = pd.concat(u_nks) | ||
if dhdls is not None: | ||
dhdls = pd.concat(dhdls) | ||
|
||
# Step 3: Estimate Free energy differences | ||
for estimator in estimators: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,10 @@ class StatisticalInefficiencyDhdl: | |
needs_u_nks = False | ||
|
||
dhdl = None | ||
uncorr_threshold = None | ||
|
||
def __init__(self, uncorr_threshold): | ||
self.uncorr_threshold = uncorr_threshold | ||
|
||
def set_dhdls(self, dhdls): | ||
""" | ||
|
@@ -50,13 +54,22 @@ def uncorrelate(self, dfs, lower): | |
dl.append(dli) | ||
|
||
uncorrelated_dfs = [] | ||
for dhdl_, l, df in zip(self.dhdls, dl, dfs): | ||
print("Number of correlated and uncorrelated samples (Method=%s):\n\n%6s %12s %12s %12s\n" % ("dHdl", "State", "N", "N_k", "N/N_k")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be good to use the str.format() method since we are py3 only. |
||
for idx, (dhdl_, l, df) in enumerate(zip(self.dhdls, dl, dfs)): | ||
ind = np.array(l, dtype=bool) | ||
ind = np.array(ind, dtype=int) | ||
dhdl_sum = dhdl_.dot(ind) | ||
uncorrelated_dfs.append(alchemlyb.preprocessing.statistical_inefficiency(df, dhdl_sum, lower, conservative=False)) | ||
uncorrelated_df = alchemlyb.preprocessing.statistical_inefficiency(df, dhdl_sum, lower, conservative=False) | ||
N, N_k = len(df), len(uncorrelated_df) | ||
g = N/N_k | ||
print("%6s %12s %12s %12.2f" % (idx, N, N_k, g)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be good to use the str.format() method since we are py3 only. |
||
if N_k < self.uncorr_threshold: | ||
print("WARNING: Only %d uncorrelated samples found at lambda number %d; proceeding with analysis using correlated samples..." % (N_k, idx)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might be better to use the warning instead of print the warning.
|
||
uncorrelated_dfs.append(df) | ||
else: | ||
uncorrelated_dfs.append(uncorrelated_df) | ||
|
||
return pandas.concat(uncorrelated_dfs) | ||
return uncorrelated_dfs | ||
|
||
|
||
def get_plugin(*args): | ||
|
@@ -65,4 +78,4 @@ def get_plugin(*args): | |
:return: | ||
Statitical inefficiency uncorrelator | ||
""" | ||
return StatisticalInefficiencyDhdl() | ||
return StatisticalInefficiencyDhdl(*args) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,10 @@ class StatisticalInefficiencyDhdlAll: | |
needs_u_nks = False | ||
|
||
dhdl = None | ||
uncorr_threshold = None | ||
|
||
def __init__(self, uncorr_threshold): | ||
self.uncorr_threshold = uncorr_threshold | ||
|
||
def set_dhdls(self, dhdls): | ||
""" | ||
|
@@ -29,11 +33,20 @@ def uncorrelate(self, dfs, lower): | |
""" | ||
|
||
uncorrelated_dfs = [] | ||
for dhdl_, df in zip(self.dhdls, dfs): | ||
print("Number of correlated and uncorrelated samples (Method=%s):\n\n%6s %12s %12s %12s\n" % ("dHdl (all)", "State", "N", "N_k", "N/N_k")) | ||
for idx, (dhdl_, df) in enumerate(zip(self.dhdls, dfs)): | ||
dhdl_sum = dhdl_.sum(axis=1) | ||
uncorrelated_dfs.append(alchemlyb.preprocessing.statistical_inefficiency(df, dhdl_sum, lower, conservative=False)) | ||
uncorrelated_df = alchemlyb.preprocessing.statistical_inefficiency(df, dhdl_sum, lower, conservative=False) | ||
N, N_k = len(df), len(uncorrelated_df) | ||
g = N/N_k | ||
print("%6s %12s %12s %12.2f" % (idx, N, N_k, g)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be good to use the str.format() method since we are py3 only. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point. I tried to keep the style similar to the rest of the repo which still uses the % syntax but will happily change it. |
||
if N_k < self.uncorr_threshold: | ||
print("WARNING: Only %d uncorrelated samples found at lambda number %d; proceeding with analysis using correlated samples..." % (N_k, idx)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be good to make a base class that does this check to avoid code duplication. |
||
uncorrelated_dfs.append(df) | ||
else: | ||
uncorrelated_dfs.append(uncorrelated_df) | ||
|
||
return pandas.concat(uncorrelated_dfs) | ||
return uncorrelated_dfs | ||
|
||
|
||
def get_plugin(*args): | ||
|
@@ -42,4 +55,4 @@ def get_plugin(*args): | |
:return: | ||
Statitical inefficiency uncorrelator using a sum of all dhdls | ||
""" | ||
return StatisticalInefficiencyDhdlAll() | ||
return StatisticalInefficiencyDhdlAll(*args) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
adapted from alchemical-analysis. -i sounds unintuitive to me, but thats what AA is using.