From 2d0e90dd07997dbbe244387cf83ca8a26f95692a Mon Sep 17 00:00:00 2001 From: Lee-Ping Date: Mon, 31 Mar 2014 21:38:42 -0700 Subject: [PATCH 01/25] Begin new data table parsing --- src/thermo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/thermo.py b/src/thermo.py index 017586c78..e1f743a55 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -1,6 +1,7 @@ import os import errno import numpy as np +import pandas as pd from forcebalance.target import Target from forcebalance.finite_difference import in_fd From 9d939ac9c34f4c80f28862a9cbf3904e2efecfff Mon Sep 17 00:00:00 2001 From: Lee-Ping Date: Fri, 4 Apr 2014 01:30:40 -0700 Subject: [PATCH 02/25] Implemented tri-format parser (broke Thermo) --- src/parser.py | 3 +- src/thermo.py | 361 ++++++++++++++++-- .../targets/Lipid_MUL/lipidcol2a.txt | 5 + .../targets/Lipid_MUL/scd323.txt | 17 + .../targets/Lipid_MUL/scd333.txt | 17 + .../targets/Lipid_MUL/scd338.txt | 17 + .../targets/Lipid_MUL/scd353.txt | 17 + .../targets/Lipid_RIT/lipidcol1.txt | 68 ++++ .../targets/Lipid_SPC/lipidcol1.txt | 67 ++++ .../targets/Lipid_TAB/lipidcol1.txt | 65 ++++ .../targets/LiquidBromine/expset.txt | 4 +- .../targets/LiquidBromine_CSV/data.csv | 8 + .../targets/LiquidBromine_TAB/data.tab.txt | 8 + .../004_thermo_liquid_bromine/test_parse.in | 145 +++++++ 14 files changed, 773 insertions(+), 29 deletions(-) create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd323.txt create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd333.txt create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd338.txt create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd353.txt create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt create mode 100644 studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv create mode 100644 studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt create mode 100644 studies/004_thermo_liquid_bromine/test_parse.in diff --git a/src/parser.py b/src/parser.py index 95343c86b..529753cfc 100644 --- a/src/parser.py +++ b/src/parser.py @@ -150,7 +150,7 @@ "gmx_top" : (None, -10, 'Gromacs .top files. If not provided, will search for default.', 'Targets that use GROMACS', 'GMX'), "gmx_ndx" : (None, -10, 'Gromacs .ndx files. If not provided, will search for default.', 'Targets that use GROMACS', 'GMX'), "tinker_key" : (None, -10, 'TINKER .key files. If not provided, will search for default.', 'Targets that use TINKER', 'TINKER'), - "expdata_txt" : ('expset.txt', 0, 'Text file containing experimental data.', 'Thermodynamic properties target', 'thermo'), + "source" : ('data.txt', 0, 'Text file containing source data (experimental data, parameters for observable models, weights).', 'Thermodynamic properties target', 'thermo'), "read" : (None, 50, 'Provide a temporary directory ".tmp" to read data from a previous calculation on the initial iteration (for instance, to restart an aborted run).', 'Liquid and Remote targets', 'Liquid, Remote'), }, 'allcaps' : {"type" : (None, 200, 'The type of fitting target, for instance AbInitio_GMX ; this must correspond to the name of a Target subclass.', 'All targets (important)' ,''), @@ -281,6 +281,7 @@ "gas_equ_steps" : "gas_eq_steps", "lipid_prod_steps" : "lipid_md_steps", "lipid_equ_steps" : "lipid_eq_steps", + "expdata_txt" : "source", } ## Listing of sections in the input file. diff --git a/src/thermo.py b/src/thermo.py index 14cd23629..351f9957f 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -1,7 +1,11 @@ import os +import re +import csv import errno import numpy as np import pandas as pd +import itertools +import cStringIO from forcebalance.target import Target from forcebalance.finite_difference import in_fd @@ -10,16 +14,284 @@ from forcebalance.nifty import LinkFile, link_dir_contents from forcebalance.nifty import printcool, printcool_dictionary -from collections import OrderedDict +from collections import defaultdict, OrderedDict from forcebalance.output import getLogger logger = getLogger(__name__) -# + +class TextParser(object): + """ Parse a text file. """ + def __init__(self, fnm): + self.fnm = fnm + self.parse() + + def is_empty_line(self): + return all([len(fld.strip()) == 0 for fld in self.fields]) + + def is_comment_line(self): + return re.match('^[\'"]?#',self.fields[0].strip()) + + def process_header(self): + """ Function for setting more attributes using the header line, if needed. """ + self.headings = [i.strip() for i in self.fields[:]] + + def process_data(self): + """ Function for setting more attributes using the current line, if needed. """ + trow = [] + for ifld in range(len(self.headings)): + if ifld < len(self.fields): + trow.append(self.fields[ifld]) + else: + trow.append('') + return trow + + def sanity_check(self): + """ Extra sanity checks. """ + + def parse(self): + self.headings = [] # Fields in header line + meta = defaultdict(list) # Dictionary of metadata + found_header = 0 # Whether we found the header line + table = [] # List of data records + self.generate_splits() # Generate a list of records for each line. + self.ln = 0 # Current line number + for line, fields in zip(open(self.fnm).readlines(), self.splits): + # Set attribute so methods can use it. + self.fields = fields + # Skip over empty lines or comment lines. + if self.is_empty_line(): + logger.info("\x1b[96mempt\x1b[0m %s\n" % line.replace('\n','')) + self.ln += 1 + continue + if self.is_comment_line(): + logger.info("\x1b[96mcomm\x1b[0m %s\n" % line.replace('\n','')) + self.ln += 1 + continue + # Indicates metadata mode. + is_meta = 0 + # Indicates whether this is the header line. + is_header = 0 + # Split line by tabs. + for ifld, fld in enumerate(fields): + fld = fld.strip() + # Stop parsing when we encounter a comment line. + if re.match('^[\'"]?#',fld): break + # The first word would contain the name of the metadata key. + if ifld == 0: + mkey = fld + # Check if the first field is an equals sign (turn on metadata mode). + if ifld == 1: + # Activate metadata mode. + if fld == "=": + is_meta = 1 + # Otherwise, this is the header line. + elif not found_header: + is_header = 1 + found_header = 1 + # Read in metadata. + if ifld > 1 and is_meta: + meta[mkey].append(fld) + # Set field start, field end, and field content for the header. + if is_header: + logger.info("\x1b[1;96mhead\x1b[0m %s\n" % line.replace('\n','')) + self.process_header() + elif is_meta: + logger.info("\x1b[96mmeta\x1b[0m %s\n" % line.replace('\n','')) + else: + # Build the row of data to be appended to the table. + # Loop through the fields in the header and inserts fields + # in the data line accordingly. Ignores trailing tabs/spaces. + logger.info("\x1b[96mdata\x1b[0m %s\n" % line.replace('\n','')) + table.append(self.process_data()) + self.ln += 1 + self.sanity_check() + printcool("%s parsed as %s" % (self.fnm.replace(os.getcwd()+'/',''), self.format), color=6) + self.metadata = meta + self.table = table + +class CSV_Parser(TextParser): + + """ + Parse a comma-separated file. This class is for all + source files that are .csv format (characterized by having the + same number of comma-separated fields in each line). Fields are + separated by commas but they may contain commas as well. + + In contrast to the other formats, .csv MUST contain the same + number of commas in each line. .csv format is easily prepared + using Excel. + """ + + def __init__(self, fnm): + self.format = "comma-separated values (csv)" + super(CSV_Parser, self).__init__(fnm) + + def generate_splits(self): + with open(self.fnm, 'r') as f: self.splits = list(csv.reader(f)) + +class TAB_Parser(TextParser): + + """ + Parse a tab-delimited file. This function is called for all + source files that aren't csv and contain at least one tab. + Fields are separated by tabs and do not contain tabs. + + Tab-delimited format is easy to prepare using programs like Excel. + It is easier to read than .csv but represented differently by + different editors. + + Empty fields must still exist (represented using multiple tabs). + """ + + def __init__(self, fnm): + self.format = "tab-delimited text" + super(TAB_Parser, self).__init__(fnm) + + def generate_splits(self): + self.splits = [line.split('\t') for line in open(self.fnm).readlines()] + +class FIX_Parser(TextParser): + + """ + Parse a fixed width format file. This function is called for all + source files that aren't csv and contain no tabs. + + Fixed width is harder to prepare by hand but easiest to read, + because it looks the same in all text editors. The field width is + determined by the header line (first line in the data table), + i.e. the first non-empty, non-comment, non-metadata line. + + Empty fields need to be filled with the correct number of spaces. + All fields must have the same alignment (left or right). The + start and end of each field is determined from the header line and + used to determine alignment. If the alignment cannot be determined + then it will throw an error. + + Example of a left-aligned fixed width file: + + T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 + 323.15 1 0.631 1 C15 C34 + C17 0.198144 C36 0.198144 + C18 0.198128 C37 0.198128 + C19 0.198111 C38 0.198111 + C20 0.198095 C39 0.198095 + C21 0.198079 C40 0.198079 + C22 0.197799 C41 0.197537 + C23 0.198045 C42 0.198046 + C24 0.178844 C43 0.178844 + C25 0.167527 C44 0.178565 + C26 0.148851 C45 0.16751 + C27 0.134117 C46 0.148834 + C28 0.119646 C47 0.1341 + C29 0.100969 C48 0.110956 + C30 0.07546 C49 0.087549 + C31 C50 + + """ + + def __init__(self, fnm): + self.format = "fixed-width text" + self.fbegs_dat = [] + self.fends_dat = [] + super(FIX_Parser, self).__init__(fnm) + + def generate_splits(self): + # This regular expression splits a string looking like this: + # "Density (kg m^-3) Hvap (kJ mol^-1) Alpha Kappa". But I + # don't want to split in these places: "Density_(kg_m^-3) + # Hvap_(kJ_mol^-1) Alpha Kappa" + allfields = [list(re.finditer('[^\s(]+(?:\s*\([^)]*\))?', line)) for line in open(self.fnm).readlines()] + self.splits = [] + # Field start / end positions for each line in the file + self.fbegs = [] + self.fends = [] + for line, fields in zip(open(self.fnm).readlines(), allfields): + self.splits.append([fld.group(0) for fld in fields]) + self.fbegs.append([fld.start() for fld in fields]) + self.fends.append([fld.end() for fld in fields]) + + def process_header(self): + super(FIX_Parser, self).process_header() + # Field start / end positions for the header line + self.hbeg = self.fbegs[self.ln] + self.hend = self.fends[self.ln] + + def process_data(self): + trow = [] + hbeg = self.hbeg + hend = self.hend + fbeg = self.fbegs[self.ln] + fend = self.fends[self.ln] + fields = self.fields + # Check alignment and throw an error if incorrectly formatted. + if not ((set(fbeg).issubset(hbeg)) or (set(fend).issubset(hend))): + logger.error("This \x1b[91mdata line\x1b[0m is not aligned with the \x1b[92mheader line\x1b[0m!\n") + logger.error("\x1b[92m%s\x1b[0m\n" % header.replace('\n','')) + logger.error("\x1b[91m%s\x1b[0m\n" % line.replace('\n','')) + raise RuntimeError + # Left-aligned case + if set(fbeg).issubset(hbeg): + for hpos in hbeg: + if hpos in fbeg: + trow.append(fields[fbeg.index(hpos)]) + else: + trow.append('') + # Right-aligned case + if set(fend).issubset(hend): + for hpos in hend: + if hpos in fend: + trow.append(fields[fend.index(hpos)]) + else: + trow.append('') + # Field start / end positions for the line of data + self.fbegs_dat.append(fbeg[:]) + self.fends_dat.append(fend[:]) + return trow + + def sanity_check(self): + if set(self.hbeg).issuperset(set(itertools.chain(*self.fbegs_dat))): + self.format = "left-aligned fixed width text" + elif set(self.hend).issuperset(set(itertools.chain(*self.fends_dat))): + self.format = "right-aligned fixed width text" + else: + # Sanity check - it should never get here unless the parser is incorrect. + raise RuntimeError("Fixed-width format detected but columns are neither left-aligned nor right-aligned!") + +def parse1(fnm): + + """Determine the format of the source file and call the + appropriate parsing function.""" + + # CSV files have the same number of comma separated fields in every line, they are the simplest to parse. + with open(fnm, 'r') as f: csvf = list(csv.reader(f)) + if len(csvf[0]) > 1 and len(set([len(i) for i in csvf])) == 1: + return CSV_Parser(fnm) + + # Strip away comments and empty lines. + nclines = [re.sub('[ \t]*#.*$','',line) for line in open(fnm).readlines() + if not (line.strip().startswith("#") or not line.strip())] + + # Print the sanitized lines to a new file object. + # Note the file object needs ot be rewound every time we read or write to it. + fdat = cStringIO.StringIO() + for line in nclines: + print >> fdat, line, + fdat.seek(0) + + # Now the file can either be tab-delimited or fixed width. + # If ANY tabs are found in the sanitized lines, then it is taken to be + # a tab-delimited file. + have_tabs = any(['\t' in line for line in fdat.readlines()]) ; fdat.seek(0) + if have_tabs: + return TAB_Parser(fnm) + else: + return FIX_Parser(fnm) + return + class Thermo(Target): """ A target for fitting general experimental data sets. The - experimental data is described in a .txt file and is handled with a - `Quantity` subclass. + source data is described in a .txt file. """ def __init__(self, options, tgt_opts, forcefield): @@ -27,8 +299,8 @@ def __init__(self, options, tgt_opts, forcefield): super(Thermo, self).__init__(options, tgt_opts, forcefield) ## Parameters - # Reference experimental data - self.set_option(tgt_opts, "expdata_txt", forceprint=True) + # Source data (experimental data, model parameters and weights) + self.set_option(tgt_opts, "source", forceprint=True) # Quantities to calculate self.set_option(tgt_opts, "quantities", forceprint=True) # Length of simulation chain @@ -48,31 +320,68 @@ def __init__(self, options, tgt_opts, forcefield): # Weights for quantities self.weights = {} - ## Read experimental data and initialize points - self._read_expdata(os.path.join(self.root, - self.tgtdir, - self.expdata_txt)) + ## Read source data and initialize points + self.read_source(os.path.join(self.root, self.tgtdir, self.source)) ## Copy run scripts from ForceBalance installation directory for f in self.scripts: LinkFile(os.path.join(os.path.split(__file__)[0], "data", f), os.path.join(self.root, self.tempdir, f)) - def _read_expdata(self, expdata): - """Read and store experimental data. + def read_source(self, source): + """Read and store source data. Parameters ---------- - expdata : string - Read experimental data from this filename. + source : string + Read source data from this filename. Returns ------- Nothing """ - fp = open(expdata) + + parser = parse1(source) + print parser.headings + printcool_dictionary(parser.metadata, title="Metadata") + # print parser.table + revised_headings = [] + obs = '' + def error_left(i): + logger.error('Encountered heading %s but there is no observable to the left\n' % i) + raise RuntimeError + + for head in parser.headings: + usplit = re.split(' *\(', head, maxsplit=1) + if len(usplit) > 1: + hfirst = usplit[0] + punit = re.sub('\)$','',usplit[1].strip()) + print "header", head, "split into", hfirst, ",", punit + else: + hfirst = head + punit = '' + newh = hfirst + if head.lower() in ['w', 'wt', 'wts']: + if obs == '': error_left(head) + newh = obs + '_' + hfirst + elif head.lower() in ['s', 'sig', 'sigma']: + if obs == '': error_left(head) + newh = obs + '_' + hfirst + elif head.lower() in ['idx']: + if obs == '': error_left(head) + newh = obs + '_' + hfirst + else: + obs = hfirst + if newh != hfirst: + print "header", head, "renamed to", newh + + raw_input() + + return + fp = open(expdata) + line = fp.readline() foundHeader = False names = None @@ -80,32 +389,32 @@ def _read_expdata(self, expdata): label_header = None label_unit = None count = 0 + metadata = {} while line: # Skip comments and blank lines if line.lstrip().startswith("#") or not line.strip(): line = fp.readline() continue - + # Metadata is denoted using if "=" in line: # Read variable param, value = line.split("=") param = param.strip().lower() - if param == "denoms": - for e, v in enumerate(value.split()): - self.denoms[self.quantities[e]] = float(v) - elif param == "weights": - for e, v in enumerate(value.split()): - self.weights[self.quantities[e]] = float(v) + metadata[param] = value + # if param == "denoms": + # for e, v in enumerate(value.split()): + # self.denoms[self.quantities[e]] = float(v) + # elif param == "weights": + # for e, v in enumerate(value.split()): + # self.weights[self.quantities[e]] = float(v) elif foundHeader: # Read exp data count += 1 vals = line.split() - label = (vals[0], label_header, label_unit) refs = np.array(vals[1:-2:2]).astype(float) wts = np.array(vals[2:-2:2]).astype(float) temperature = float(vals[-2]) pressure = None if vals[-1].lower() == "none" else \ float(vals[-1]) - dp = Point(count, label=label, refs=refs, weights=wts, names=names, units=units, temperature=temperature, pressure=pressure) @@ -114,12 +423,10 @@ def _read_expdata(self, expdata): foundHeader = True headers = zip(*[tuple(h.split("_")) for h in line.split() if h != "w"]) - label_header = list(headers[0])[0] label_unit = list(headers[1])[0] names = list(headers[0][1:-2]) units = list(headers[1][1:-2]) - line = fp.readline() def retrieve(self, dp): @@ -214,6 +521,7 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True): def indicate(self): """Shows optimization state.""" + return AGrad = hasattr(self, 'Gp') PrintDict = OrderedDict() @@ -358,6 +666,7 @@ def get(self, mvals, AGrad=True, AHess=True): Objective = 0.0 Gradient = np.zeros(self.FF.np) Hessian = np.zeros((self.FF.np, self.FF.np)) + return { "X": Objective, "G": Gradient, "H": Hessian} for pt in self.points: # Update data point with MD results diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt new file mode 100644 index 000000000..8ba35c2d9 --- /dev/null +++ b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt @@ -0,0 +1,5 @@ +T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic +323.15 1 0.631 1 scd323.txt:1 scd323.txt:2 scd323.txt:3 scd323.txt:4 1 58 1 10 +333.15 1 0.65 1 scd333.txt:1 scd333.txt:2 scd333.txt:3 scd333.txt:4 0 58 0 10 +338.15 1 0.671 1 scd338.txt:1 scd338.txt:2 scd338.txt:3 scd338.txt:4 1 58 0 10 +353.15 1 0.719 1 scd353.txt:1 scd353.txt:2 scd353.txt:3 scd353.txt:4 1 58 0 10 diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd323.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd323.txt new file mode 100644 index 000000000..57c1cfa5b --- /dev/null +++ b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd323.txt @@ -0,0 +1,17 @@ +Scd1_idx Scd1 Scd2_idx Scd2 +C15 C34 +C17 0.198144 C36 0.198144 +C18 0.198128 C37 0.198128 +C19 0.198111 C38 0.198111 +C20 0.198095 C39 0.198095 +C21 0.198079 C40 0.198079 +C22 0.197799 C41 0.197537 +C23 0.198045 C42 0.198046 +C24 0.178844 C43 0.178844 +C25 0.167527 C44 0.178565 +C26 0.148851 C45 0.16751 +C27 0.134117 C46 0.148834 +C28 0.119646 C47 0.1341 +C29 0.100969 C48 0.110956 +C30 0.07546 C49 0.087549 +C31 C50 diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd333.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd333.txt new file mode 100644 index 000000000..26ee01c85 --- /dev/null +++ b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd333.txt @@ -0,0 +1,17 @@ +Scd1_idx Scd1 Scd2_idx Scd2 +C15 C34 +C17 0.181121 C36 0.181121 +C18 0.180807 C37 0.180807 +C19 0.181055 C38 0.181055 +C20 0.180741 C39 0.180741 +C21 0.180989 C40 0.180989 +C22 0.168579 C41 0.168579 +C23 0.169109 C42 0.169109 +C24 0.149104 C43 0.149104 +C25 0.138945 C44 0.138945 +C26 0.123439 C45 0.138629 +C27 0.112717 C46 0.123968 +C28 0.098056 C47 0.112121 +C29 0.083396 C48 0.089303 +C30 0.062266 C49 0.070424 +C31 C50 diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd338.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd338.txt new file mode 100644 index 000000000..26ee01c85 --- /dev/null +++ b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd338.txt @@ -0,0 +1,17 @@ +Scd1_idx Scd1 Scd2_idx Scd2 +C15 C34 +C17 0.181121 C36 0.181121 +C18 0.180807 C37 0.180807 +C19 0.181055 C38 0.181055 +C20 0.180741 C39 0.180741 +C21 0.180989 C40 0.180989 +C22 0.168579 C41 0.168579 +C23 0.169109 C42 0.169109 +C24 0.149104 C43 0.149104 +C25 0.138945 C44 0.138945 +C26 0.123439 C45 0.138629 +C27 0.112717 C46 0.123968 +C28 0.098056 C47 0.112121 +C29 0.083396 C48 0.089303 +C30 0.062266 C49 0.070424 +C31 C50 diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd353.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd353.txt new file mode 100644 index 000000000..31434af01 --- /dev/null +++ b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd353.txt @@ -0,0 +1,17 @@ +Scd1_idx Scd1 Scd2_idx Scd2 +C15 C34 +C17 0.162535 C36 0.162535 +C18 0.162817 C37 0.162817 +C19 0.162535 C38 0.162535 +C20 0.162535 C39 0.162535 +C21 0.162817 C40 0.162817 +C22 0.151268 C41 0.151268 +C23 0.142254 C42 0.142254 +C24 0.127606 C43 0.127606 +C25 0.117465 C44 0.117465 +C26 0.101972 C45 0.117183 +C27 0.092676 C46 0.102535 +C28 0.081408 C47 0.092676 +C29 0.068732 C48 0.073239 +C30 0.051267 C49 0.056901 +C31 C50 diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt new file mode 100644 index 000000000..c26cf23d5 --- /dev/null +++ b/studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt @@ -0,0 +1,68 @@ +metadata = 'Mao' + + T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic +323.15 1 0.631 1 C15 C34 1 58 1 10 + C17 0.198144 C36 0.198144 + C18 0.198128 C37 0.198128 + C19 0.198111 C38 0.198111 + C20 0.198095 C39 0.198095 + C21 0.198079 C40 0.198079 + C22 0.197799 C41 0.197537 + C23 0.198045 C42 0.198046 + C24 0.178844 C43 0.178844 + C25 0.167527 C44 0.178565 + C26 0.148851 C45 0.16751 + C27 0.134117 C46 0.148834 + C28 0.119646 C47 0.1341 + C29 0.100969 C48 0.110956 + C30 0.07546 C49 0.087549 + C31 C50 +333.15 1 0.65 1 C15 C34 0 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 +338.15 1 0.671 1 C15 C34 1 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 +353.15 1 0.719 1 C15 C34 1 58 0 10 + C17 0.162535 C36 0.162535 + C18 0.162817 C37 0.162817 + C19 0.162535 C38 0.162535 + C20 0.162535 C39 0.162535 + C21 0.162817 C40 0.162817 + C22 0.151268 C41 0.151268 + C23 0.142254 C42 0.142254 + C24 0.127606 C43 0.127606 + C25 0.117465 C44 0.117465 + C26 0.101972 C45 0.117183 + C27 0.092676 C46 0.102535 + C28 0.081408 C47 0.092676 + C29 0.068732 C48 0.073239 + C30 0.051267 C49 0.056901 + C31 C50 + diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt new file mode 100644 index 000000000..f2bbb57e1 --- /dev/null +++ b/studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt @@ -0,0 +1,67 @@ +metadata = 'Mao' + +T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic +323.15 1 0.631 1 C15 C34 1 58 1 10 + C17 0.198144 C36 0.198144 + C18 0.198128 C37 0.198128 + C19 0.198111 C38 0.198111 + C20 0.198095 C39 0.198095 + C21 0.198079 C40 0.198079 + C22 0.197799 C41 0.197537 + C23 0.198045 C42 0.198046 + C24 0.178844 C43 0.178844 + C25 0.167527 C44 0.178565 + C26 0.148851 C45 0.16751 + C27 0.134117 C46 0.148834 + C28 0.119646 C47 0.1341 + C29 0.100969 C48 0.110956 + C30 0.07546 C49 0.087549 + C31 C50 +333.15 1 0.65 1 C15 C34 0 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 +338.15 1 0.671 1 C15 C34 1 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 +353.15 1 0.719 1 C15 C34 1 58 0 10 + C17 0.162535 C36 0.162535 + C18 0.162817 C37 0.162817 + C19 0.162535 C38 0.162535 + C20 0.162535 C39 0.162535 + C21 0.162817 C40 0.162817 + C22 0.151268 C41 0.151268 + C23 0.142254 C42 0.142254 + C24 0.127606 C43 0.127606 + C25 0.117465 C44 0.117465 + C26 0.101972 C45 0.117183 + C27 0.092676 C46 0.102535 + C28 0.081408 C47 0.092676 + C29 0.068732 C48 0.073239 + C30 0.051267 C49 0.056901 + C31 C50 diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt new file mode 100644 index 000000000..0ec75d7af --- /dev/null +++ b/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt @@ -0,0 +1,65 @@ +T P Punit MBAR Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic +323.15 1 atm FALSE 0.631 1 C15 C34 1 58 1 10 + C17 0.198144 C36 0.198144 + C18 0.198128 C37 0.198128 + C19 0.198111 C38 0.198111 + C20 0.198095 C39 0.198095 + C21 0.198079 C40 0.198079 + C22 0.197799 C41 0.197537 + C23 0.198045 C42 0.198046 + C24 0.178844 C43 0.178844 + C25 0.167527 C44 0.178565 + C26 0.148851 C45 0.16751 + C27 0.134117 C46 0.148834 + C28 0.119646 C47 0.1341 + C29 0.100969 C48 0.110956 + C30 0.07546 C49 0.087549 + C31 C50 +333.15 1 atm FALSE 0.65 1 C15 C34 0 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 +338.15 1 atm FALSE 0.671 1 C15 C34 1 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 +353.15 1 atm FALSE 0.719 1 C15 C34 1 58 0 10 + C17 0.162535 C36 0.162535 + C18 0.162817 C37 0.162817 + C19 0.162535 C38 0.162535 + C20 0.162535 C39 0.162535 + C21 0.162817 C40 0.162817 + C22 0.151268 C41 0.151268 + C23 0.142254 C42 0.142254 + C24 0.127606 C43 0.127606 + C25 0.117465 C44 0.117465 + C26 0.101972 C45 0.117183 + C27 0.092676 C46 0.102535 + C28 0.081408 C47 0.092676 + C29 0.068732 C48 0.073239 + C30 0.051267 C49 0.056901 + C31 C50 diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt b/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt index 8f211bdaa..3b653f242 100644 --- a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt +++ b/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt @@ -1,7 +1,7 @@ # Experimental data for liquid bromine. - Temp_K Density_kg/m^3 w Enthalpy_kJ/mol w Temperature_K Pressure_bar - 298.15 3102.8 1.0 29.96 1.0 298.15 1.01325 + Temp (K) Pressure (bar) Density (kg/m^3) w Hvap ( kJ/mol ) w + 298.15 1.01325 3102.8 1.0 29.96 1.0 # Variables: Denominators and weights for quantities Denoms = 30 0.3 diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv b/studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv new file mode 100644 index 000000000..847381612 --- /dev/null +++ b/studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv @@ -0,0 +1,8 @@ +"# Experimental data for liquid, bromine.",,,,, +,,,,, +Temp (K),Density (kg/m^3),w,Hvap (kJ/mol),w,Pressure (bar) +298.15,3102.8,1,29.96,1,1.01325 +,,,,, +# Variables: Denominators and weights for quantities,,,,, +Denoms,=,30,0.3,, +Weights,=,1.0,1.0,, diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt b/studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt new file mode 100644 index 000000000..333f48bbb --- /dev/null +++ b/studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt @@ -0,0 +1,8 @@ +"# Experimental data for liquid, bromine." + +Temp (K) Density (kg/m^3) w Hvap (kJ/mol) w Pressure (bar) +298.15 3102.8 1 29.96 1 1.01325 + +# Variables: Denominators and weights for quantities +Denoms = 30 0.3 +Weights = 1 1 diff --git a/studies/004_thermo_liquid_bromine/test_parse.in b/studies/004_thermo_liquid_bromine/test_parse.in new file mode 100644 index 000000000..6e1a704df --- /dev/null +++ b/studies/004_thermo_liquid_bromine/test_parse.in @@ -0,0 +1,145 @@ +# ForceBalance input file generated by MakeInputFile.py + +# The octothorpe '#' is a comment symbol + +# Note: If the specified value is 'None' then the option will truly be set to +# None - not the string 'None' + +# Note: 'Section' option types are more complicated and may require you to read +# the documentation + +# Note: Boolean option types require no value, the key being present implies +# 'True' + +$options +# (string) Directory containing force fields, relative to project directory +ffdir forcefield + +# (string) Type of the penalty, L2 or L1 in the optimizer +penalty_type L2 + +# (allcap) The job type, defaults to a single-point evaluation of objective +# function +jobtype newton + +# (list) The names of force fields, corresponding to directory +# forcefields/file_name.(itp|gen) +forcefield bro.itp + +# (int) Maximum number of steps in an optimization +maxstep 100 + +# (float) Convergence criterion of step size (just needs to fall below this +# threshold) +convergence_step 0.05 + +# (float) Convergence criterion of objective function (in MainOptimizer this is +# the stdev of x2 over 10 steps) +convergence_objective 0.5 + +# (float) Convergence criterion of gradient norm +convergence_gradient 0.01 + +# (float) Minimum eigenvalue for applying steepest descent correction in the +# MainOptimizer +eig_lowerbound 0.01 + +# (float) Step size for finite difference derivatives in many functions +# (get_(G/H) in fitsim, FDCheckG) +finite_difference_h 0.001 + +# (float) Factor for multiplicative penalty function in objective function +penalty_additive 1.0 + +trust0 1.0 +mintrust 0.05 +error_tolerance 1.0 +adaptive_factor 0.2 +adaptive_damping 1.0 +normalize_weights no +print_hessian + +# Charge constraints are taken care of using "evals". +constrain_charge false +verbose_options false +backup false + +$end + +$target +name LiquidBromine +type Thermo_GMX +weight 1.0 +expdata_txt expset.txt +quantities density h_vap +n_sim_chain 2 +md_steps 100000 +eq_steps 50000 +$end + +$target +name LiquidBromine_CSV +type Thermo_GMX +weight 1.0 +expdata_txt data.csv +quantities density h_vap +n_sim_chain 2 +md_steps 100000 +eq_steps 50000 +$end + +$target +name LiquidBromine_TAB +type Thermo_GMX +weight 1.0 +expdata_txt data.tab.txt +quantities density h_vap +n_sim_chain 2 +md_steps 100000 +eq_steps 50000 +$end + +$target +name Lipid_SPC +type Thermo_GMX +weight 1.0 +expdata_txt lipidcol1.txt +quantities density h_vap +n_sim_chain 2 +md_steps 100000 +eq_steps 50000 +$end + +$target +name Lipid_RIT +type Thermo_GMX +weight 1.0 +expdata_txt lipidcol1.txt +quantities density h_vap +n_sim_chain 2 +md_steps 100000 +eq_steps 50000 +$end + +$target +name Lipid_TAB +type Thermo_GMX +weight 1.0 +expdata_txt lipidcol1.txt +quantities density h_vap +n_sim_chain 2 +md_steps 100000 +eq_steps 50000 +$end + +$target +name Lipid_MUL +type Thermo_GMX +weight 1.0 +expdata_txt lipidcol2a.txt +quantities density h_vap +n_sim_chain 2 +md_steps 100000 +eq_steps 50000 +$end + From 902264c506bbbd242dfeafce454a73afcfff20fe Mon Sep 17 00:00:00 2001 From: Lee-Ping Date: Sun, 6 Apr 2014 09:37:25 -0700 Subject: [PATCH 03/25] Fix up exception handling --- src/PDB.py | 125 +++++++++------- src/abinitio.py | 27 ++-- src/binding.py | 6 +- src/contact.py | 18 ++- src/forcefield.py | 23 ++- src/gmxio.py | 27 ++-- src/interaction.py | 9 +- src/lipid.py | 30 ++-- src/liquid.py | 51 ++++--- src/molecule.py | 140 ++++++++++++------ src/moments.py | 3 +- src/nifty.py | 35 +++-- src/objective.py | 6 +- src/openmmio.py | 40 +++-- src/optimizer.py | 4 +- src/output.py | 1 + src/parser.py | 6 +- src/quantity.py | 5 +- src/target.py | 36 +++-- src/thermo.py | 96 +++++++++--- src/tinkerio.py | 31 ++-- src/vibration.py | 9 +- .../targets/LiquidBromine/expset.txt | 4 +- .../004_thermo_liquid_bromine/test_parse.in | 14 +- 24 files changed, 489 insertions(+), 257 deletions(-) diff --git a/src/PDB.py b/src/PDB.py index 4e4564a6d..2d2e77e46 100644 --- a/src/PDB.py +++ b/src/PDB.py @@ -52,6 +52,10 @@ import copy ### PC import numpy as np +import forcebalance +from forcebalance.output import * +logger = getLogger(__name__) + class END: """ END class @@ -116,7 +120,7 @@ def __init__(self, line): self.numTer = toInt(string.strip(line[55:60])) self.numConect = toInt(string.strip(line[60:65])) self.numSeq = toInt(string.strip(line[65:70])) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class CONECT: @@ -172,7 +176,7 @@ def __init__(self, line): except ValueError: self.serial9 = None try: self.serial10 = toInt(string.strip(line[56:61])) except ValueError: self.serial10 = None - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class ENDMDL: """ ENDMDL class @@ -219,7 +223,7 @@ def __init__(self, line): self.chainID = None self.resSeq = None self.iCode = None - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class SIGUIJ: """ SIGUIJ class @@ -268,7 +272,7 @@ def __init__(self, line): self.segID = string.strip(line[72:76]) self.element = string.strip(line[76:78]) self.charge = string.strip(line[78:80]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class ANISOU: @@ -318,7 +322,7 @@ def __init__(self, line): self.segID = string.strip(line[72:76]) self.element = string.strip(line[76:78]) self.charge = string.strip(line[78:80]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class SIGATM: """ SIGATM class @@ -369,7 +373,7 @@ def __init__(self, line): self.segID = string.strip(line[72:76]) self.element = string.strip(line[76:78]) self.charge = string.strip(line[78:80]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class HETATM: """ HETATM class @@ -438,7 +442,7 @@ def __init__(self,line,sybylType="A.aaa",lBonds=[],lBondedAtoms=[]): ### PC self.segID = "" self.element = "" self.charge = "" - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError def __str__(self): """ @@ -558,9 +562,11 @@ def read(self,file): # Do some error checking if start == -1: - raise Exception, "Unable to find '@ATOM' in MOL2 file!" + logger.error("Unable to find '@ATOM' in MOL2 file!\n") + raise RuntimeError elif stop == -1: - raise Exception, "Unable to find '@BOND' in MOL2 file!" + logger.error("Unable to find '@BOND' in MOL2 file!\n") + raise RuntimeError atoms = data[start+14:stop-2].split("\n") # BOND section @@ -569,7 +575,8 @@ def read(self,file): # More error checking if stop == -1: - raise Exception, "Unable to find '@SUBSTRUCTURE' in MOL2 file!" + logger.error("Unable to find '@SUBSTRUCTURE' in MOL2 file!\n") + raise RuntimeError bonds = data[start+14:stop-1].split("\n") self.parseAtoms(atoms) @@ -586,7 +593,8 @@ def parseAtoms(self,AtomList): # Error checking if len(SeparatedAtomLine) < 8: - raise Exception, "Bad atom entry in MOL2 file: %s" % AtomLine + logger.error("Bad atom entry in MOL2 file: %s\n" % AtomLine) + raise RuntimeError fakeRecord = "HETATM" fakeChain = " L" @@ -598,7 +606,8 @@ def parseAtoms(self,AtomList): float(SeparatedAtomLine[2]),float(SeparatedAtomLine[3]), float(SeparatedAtomLine[4])) except ValueError: - raise Exception, "Bad atom entry in MOL2 file: %s" % AtomLine + logger.error("Bad atom entry in MOL2 file: %s\n" % AtomLine) + raise RuntimeError thisAtom = HETATM(mol2pdb, SeparatedAtomLine[5],[],[]) self.lPDBAtoms.append(mol2pdb) @@ -611,7 +620,8 @@ def parseBonds(self,BondList): for BondLine in BondList: SeparatedBondLine = BondLine.split() if len(SeparatedBondLine) < 4: - raise Exception, "Bad bond entry in MOL2 file: %s" % BondLine + logger.error("Bad bond entry in MOL2 file: %s\n" % BondLine) + raise RuntimeError try: thisBond = MOL2BOND( toInt(SeparatedBondLine[1]), # bond frm @@ -620,7 +630,8 @@ def parseBonds(self,BondList): toInt(SeparatedBondLine[0]) # bond id ) except ValueError: - raise Exception, "Bad bond entry in MOL2 file: %s" % BondLine + logger.error("Bad bond entry in MOL2 file: %s\n" % BondLine) + raise RuntimeError self.lBonds.append(thisBond) def createlBondedAtoms(self): @@ -714,7 +725,7 @@ def __init__(self, line): self.element = "" self.charge = "" else: - raise ValueError, record + logger.error(record+'\n') ; raise ValueError def __str__(self): """ @@ -807,7 +818,7 @@ def __init__(self, line): record = string.strip(line[0:6]) if record == "MODEL": self.serial = toInt(string.strip(line[10:14])) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class TVECT: """ TVECT class @@ -835,7 +846,7 @@ def __init__(self, line): self.t2 = float(string.strip(line[20:30])) self.t3 = float(string.strip(line[30:40])) self.text = string.strip(line[40:70]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class MTRIX3: """ MTRIX3 class @@ -868,7 +879,7 @@ def __init__(self, line): self.mn3 = float(string.strip(line[30:40])) self.vn = float(string.strip(line[45:55])) self.iGiven = toInt(string.strip(line[59])) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class MTRIX2: """ MTRIX2 class @@ -901,7 +912,7 @@ def __init__(self, line): self.mn3 = float(string.strip(line[30:40])) self.vn = float(string.strip(line[45:55])) self.iGiven = toInt(string.strip(line[59])) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class MTRIX1: """ MTRIX1 class @@ -936,7 +947,7 @@ def __init__(self, line): try: self.iGiven = toInt(string.strip(line[45:55])) except ValueError: self.iGiven = None except IndexError: self.iGiven = None - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class SCALE3: """ SCALE3 class @@ -964,7 +975,7 @@ def __init__(self, line): self.sn2 = float(string.strip(line[20:30])) self.sn3 = float(string.strip(line[30:40])) self.un = float(string.strip(line[45:55])) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class SCALE2: """ SCALE2 class @@ -992,7 +1003,7 @@ def __init__(self, line): self.sn2 = float(string.strip(line[20:30])) self.sn3 = float(string.strip(line[30:40])) self.un = float(string.strip(line[45:55])) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class SCALE1: """ SCALE1 class @@ -1020,7 +1031,7 @@ def __init__(self, line): self.sn2 = float(string.strip(line[20:30])) self.sn3 = float(string.strip(line[30:40])) self.un = float(string.strip(line[45:55])) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class ORIGX2: """ ORIGX2 class @@ -1047,7 +1058,7 @@ def __init__(self, line): self.on2 = float(string.strip(line[20:30])) self.on3 = float(string.strip(line[30:40])) self.tn = float(string.strip(line[45:55])) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class ORIGX3: """ ORIGX3 class @@ -1074,7 +1085,7 @@ def __init__(self, line): self.on2 = float(string.strip(line[20:30])) self.on3 = float(string.strip(line[30:40])) self.tn = float(string.strip(line[45:55])) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class ORIGX1: """ ORIGX1 class @@ -1101,7 +1112,7 @@ def __init__(self, line): self.on2 = float(string.strip(line[20:30])) self.on3 = float(string.strip(line[30:40])) self.tn = float(string.strip(line[45:55])) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class CRYST1: """ CRYST1 class @@ -1136,7 +1147,7 @@ def __init__(self, line): self.gamma = float(string.strip(line[47:54])) self.sGroup = string.strip(line[55:65]) self.z = toInt(string.strip(line[66:70])) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class SITE: @@ -1210,7 +1221,7 @@ def __init__(self, line): self.seq4 = toInt(string.strip(line[56:60])) try: self.iCode4 = string.strip(line[60]) except IndexError: self.iCode4 = None - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class CISPEP: """ CISPEP field @@ -1251,7 +1262,7 @@ def __init__(self, line): self.icode2 = string.strip(line[35]) self.modNum = toInt(string.strip(line[43:46])) self.measure = float(string.strip(line[53:59])) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class SLTBRG: """ SLTBRG field @@ -1297,7 +1308,7 @@ def __init__(self, line): self.iCode2 = string.strip(line[56]) self.sym1 = string.strip(line[59:65]) self.sym2 = string.strip(line[66:72]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class HYDBND: """ HYDBND field @@ -1354,7 +1365,7 @@ def __init__(self, line): self.ICode2 = string.strip(line[58]) self.sym1 = string.strip(line[59:65]) self.sym2 = string.strip(line[66:72]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class LINK: """ LINK field @@ -1402,7 +1413,7 @@ def __init__(self, line): self.iCode2 = string.strip(line[56]) self.sym1 = string.strip(line[59:65]) self.sym2 = string.strip(line[66:72]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class SSBOND: @@ -1440,7 +1451,7 @@ def __init__(self, line): self.icode2 = string.strip(line[35]) self.sym1 = string.strip(line[59:65]) self.sym2 = string.strip(line[66:72]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class TURN: """ TURN field @@ -1489,7 +1500,7 @@ def __init__(self, line): self.endSeqNum = toInt(string.strip(line[31:35])) self.endICode = string.strip(line[35]) self.comment = string.strip(line[40:70]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class SHEET: """ SHEET field @@ -1584,7 +1595,7 @@ def __init__(self, line): self.prevChainID = None self.prevResSeq = None self.prevICode = None - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class HELIX: """ HELIX field @@ -1638,7 +1649,7 @@ def __init__(self, line): self.comment = string.strip(line[40:70]) try: self.length = toInt(string.strip(line[71:76])) except ValueError: self.length = None - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class FORMUL: """ FORMUL field @@ -1664,7 +1675,7 @@ def __init__(self, line): self.hetID = string.strip(line[12:15]) self.asterisk = string.strip(line[19]) self.text = string.strip(line[19:70]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class HETSYN: """ HETSYN field @@ -1687,7 +1698,7 @@ def __init__(self, line): if record == "HETSYN": self.hetID = string.strip(line[11:14]) self.hetSynonyms = string.strip(line[15:70]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class HETNAM: """ HETNAM field @@ -1709,7 +1720,7 @@ def __init__(self, line): if record == "HETNAM": self.hetID = string.strip(line[11:14]) self.text = string.strip(line[15:70]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class HET: """ HET field @@ -1749,7 +1760,7 @@ def __init__(self, line): self.iCode = string.strip(line[17]) self.numHetAtoms = toInt(string.strip(line[20:25])) self.text = string.strip(line[30:70]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class MODRES: """ MODRES field @@ -1783,7 +1794,7 @@ def __init__(self, line): string.iCode = string.strip(line[22]) string.stdRes = string.strip(line[24:27]) string.comment = string.strip(line[29:70]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class SEQRES: """ SEQRES field @@ -1840,7 +1851,7 @@ def __init__(self, line): self.resName.append(string.strip(line[59:62])) self.resName.append(string.strip(line[63:66])) self.resName.append(string.strip(line[67:70])) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class SEQADV: """ SEQADV field @@ -1886,7 +1897,7 @@ def __init__(self, line): try: self.dbSeq = toInt(string.strip(line[43:48])) except ValueError: self.dbSeq = None self.conflict = string.strip(line[49:70]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class DBREF: """ DBREF field @@ -1951,7 +1962,7 @@ def __init__(self, line): self.dbseqEnd = toInt(string.strip(line[62:67])) try: self.dbinsEnd = string.strip(line[67]) except IndexError: self.dbinsEnd = None - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class REMARK: """ REMARK field @@ -2022,7 +2033,7 @@ def __init__(self, line): record = string.strip(line[0:6]) if record == "JRNL": self.text = string.strip(line[12:70]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class SPRSDE: """ SPRSDE field @@ -2064,7 +2075,7 @@ def __init__(self, line): self.sIdCodes.append(string.strip(line[56:60])) self.sIdCodes.append(string.strip(line[61:65])) self.sIdCodes.append(string.strip(line[66:70])) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class REVDAT: """ REVDAT field @@ -2106,7 +2117,7 @@ def __init__(self, line): self.records.append(string.strip(line[46:52])) self.records.append(string.strip(line[53:59])) self.records.append(string.strip(line[60:66])) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class AUTHOR: """ AUTHOR field @@ -2127,7 +2138,7 @@ def __init__(self, line): record = string.strip(line[0:6]) if record == "AUTHOR": self.authorList = string.strip(line[10:70]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class EXPDTA: """ EXPDTA field @@ -2158,7 +2169,7 @@ def __init__(self, line): record = string.strip(line[0:6]) if record == "EXPDTA": self.technique = string.strip(line[10:70]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class KEYWDS: """ KEYWDS field @@ -2183,7 +2194,7 @@ def __init__(self, line): record = string.strip(line[0:6]) if record == "KEYWDS": self.keywds = string.strip(line[10:70]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class SOURCE: """ SOURCE field @@ -2207,7 +2218,7 @@ def __init__(self, line): record = string.strip(line[0:6]) if record == "SOURCE": self.source = string.strip(line[10:70]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class COMPND: @@ -2237,7 +2248,7 @@ def __init__(self, line): record = string.strip(line[0:6]) if record == "COMPND": self.compound = string.strip(line[10:70]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class CAVEAT: """ CAVEAT field @@ -2260,7 +2271,7 @@ def __init__(self, line): if record == "CAVEAT": self.idCode = string.strip(line[11:15]) self.comment = string.strip(line[19:70]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class TITLE: """ TITLE field @@ -2281,7 +2292,7 @@ def __init__(self, line): record = string.strip(line[0:6]) if record == "TITLE": self.title = string.strip(line[10:70]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class OBSLTE: """ OBSLTE field @@ -2332,7 +2343,7 @@ def __init__(self, line): self.rIdCodes.append(string.strip(line[56:60])) self.rIdCodes.append(string.strip(line[61:65])) self.rIdCodes.append(string.strip(line[67:70])) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError class HEADER: """ HEADER field @@ -2359,7 +2370,7 @@ def __init__(self, line): self.classification = string.strip(line[10:50]) self.depDate = string.strip(line[50:59]) self.IDcode = string.strip(line[62:66]) - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError def readAtom(line): """ @@ -2392,7 +2403,7 @@ def readAtom(line): self.segID = 0 self.element = 0 self.charge = 0 - else: raise ValueError, record + else: logger.error(record+'\n') ; raise ValueError """ # Try to find 5 consecutive floats diff --git a/src/abinitio.py b/src/abinitio.py index c30a8af9a..b6844cd51 100644 --- a/src/abinitio.py +++ b/src/abinitio.py @@ -210,7 +210,8 @@ def compute_netforce_torque(self, xyz, force, QM=False): elif self.force_map == 'chargegroup' and 'ChargeGroupNumber' in self.AtomLists: Block = self.AtomLists['ChargeGroupNumber'] else: - raise Exception('force_map keyword "%s" is invalid. Please choose from: %s' % (self.force_map, ', '.join(['"%s"' % kwds[k] for k in self.AtomLists.keys() if k in kwds]))) + logger.error('force_map keyword "%s" is invalid. Please choose from: %s\n' % (self.force_map, ', '.join(['"%s"' % kwds[k] for k in self.AtomLists.keys() if k in kwds]))) + raise RuntimeError nft = self.fitatoms # Number of particles that the force is acting on @@ -225,14 +226,16 @@ def compute_netforce_torque(self, xyz, force, QM=False): mask = np.array([i for i in range(npr) if self.AtomMask[i]]) if nfp not in [npr, nat]: - raise RuntimeError('Force contains %i particles but expected %i or %i' % (nfp, npr, nat)) + logger.error('Force contains %i particles but expected %i or %i\n' % (nfp, npr, nat)) + raise RuntimeError elif nfp == nat: frc1 = force.reshape(-1,3)[:nft].flatten() elif nfp == npr: frc1 = force.reshape(-1,3)[mask][:nft].flatten() if nxp not in [npr, nat]: - raise RuntimeError('Coordinates contains %i particles but expected %i or %i' % (nfp, npr, nat)) + logger.error('Coordinates contains %i particles but expected %i or %i\n' % (nfp, npr, nat)) + raise RuntimeError elif nxp == nat: xyz1 = xyz[:nft] elif nxp == npr: @@ -377,7 +380,8 @@ def read_reference_data(self): self.emd0 -= np.mean(self.emd0) if self.whamboltz == True: if self.attenuate: - raise RuntimeError('whamboltz and attenuate are mutually exclusive') + logger.error('whamboltz and attenuate are mutually exclusive\n') + raise RuntimeError self.boltz_wts = np.array([float(i.strip()) for i in open(os.path.join(self.root,self.tgtdir,"wham-weights.txt")).readlines()]) # This is a constant pre-multiplier in front of every snapshot. bar = printcool("Using WHAM MM Boltzmann weights.", color=4) @@ -469,13 +473,15 @@ def energy_all(self): if hasattr(self, 'engine'): return self.engine.energy().reshape(-1,1) else: - raise NotImplementedError("Target must contain an engine object") + logger.error("Target must contain an engine object\n") + raise NotImplementedError def energy_force_all(self): if hasattr(self, 'engine'): return self.engine.energy_force() else: - raise NotImplementedError("Target must contain an engine object") + logger.error("Target must contain an engine object\n") + raise NotImplementedError def energy_force_transform(self): if self.force: @@ -498,13 +504,15 @@ def energy_one(self, i): if hasattr(self, 'engine'): return self.engine.energy_one(i) else: - raise NotImplementedError("Target must contain an engine object") + logger.error("Target must contain an engine object\n") + raise NotImplementedError def energy_force_one(self, i): if hasattr(self, 'engine'): return self.engine.energy_force_one(i) else: - raise NotImplementedError("Target must contain an engine object") + logger.error("Target must contain an engine object\n") + raise NotImplementedError def energy_force_transform_one(self,i): if self.force: @@ -1165,7 +1173,8 @@ def get(self, mvals, AGrad=False, AHess=False): for i in Answer_ESP: Answer[i] += w_resp * Answer_ESP[i] if not any([self.energy, self.force, self.resp]): - raise Exception("Ab initio fitting must have at least one of: Energy, Force, ESP") + logger.error("Ab initio fitting must have at least one of: Energy, Force, ESP\n") + raise RuntimeError if not in_fd(): self.objective = Answer['X'] return Answer diff --git a/src/binding.py b/src/binding.py index 7c0e9bb40..6bb974f4f 100644 --- a/src/binding.py +++ b/src/binding.py @@ -133,7 +133,8 @@ def __init__(self,options,tgt_opts,forcefield): self.inter_opts[inter][opt] = self.global_opts[opt] for inter in self.inter_opts: if 'energy_unit' in self.inter_opts[inter] and self.inter_opts[inter]['energy_unit'].lower() not in ['kilocalorie_per_mole', 'kilocalories_per_mole']: - raise RuntimeError('Usage of physical units is has been removed, please provide all binding energies in kcal/mole') + logger.error('Usage of physical units is has been removed, please provide all binding energies in kcal/mole\n') + raise RuntimeError self.inter_opts[inter]['reference_physical'] = self.inter_opts[inter]['energy'] if tgt_opts['energy_denom'] == 0.0: @@ -152,7 +153,8 @@ def __init__(self,options,tgt_opts,forcefield): if self.cauchy: logger.info("Each contribution to the interaction energy objective function will be scaled by 1.0 / ( denom**2 + reference**2 )\n") if self.attenuate: - raise Exception('attenuate and cauchy are mutually exclusive') + logger.error('attenuate and cauchy are mutually exclusive\n') + raise RuntimeError elif self.attenuate: logger.info("Repulsive interactions beyond energy_denom will be scaled by 1.0 / ( denom**2 + (reference-denom)**2 )\n") ## Build keyword dictionaries to pass to engine. diff --git a/src/contact.py b/src/contact.py index 6ca64e1a5..bb1f4eb68 100644 --- a/src/contact.py +++ b/src/contact.py @@ -29,15 +29,18 @@ def atom_distances(xyzlist, atom_contacts, box=None): # check shapes traj_length, num_atoms, num_dims = xyzlist.shape if not num_dims == 3: - raise ValueError("xyzlist must be an n x m x 3 array") + logger.error("xyzlist must be an n x m x 3 array\n") + raise ValueError try: num_contacts, width = atom_contacts.shape assert width is 2 except (AttributeError, ValueError, AssertionError): - raise ValueError('contacts must be an n x 2 array') + logger.error('contacts must be an n x 2 array\n') + raise ValueError if not np.all(np.unique(atom_contacts) < num_atoms): - raise ValueError('Atom contacts goes larger than num_atoms') + logger.error('Atom contacts goes larger than num_atoms\n') + raise ValueError # check type if xyzlist.dtype != np.float32: @@ -59,7 +62,8 @@ def atom_distances(xyzlist, atom_contacts, box=None): _contact_wrap.atomic_contact_wrap(xyzlist, atom_contacts, results) else: if box.shape != (3,): - raise ValueError('box must be a 3-element array') + logger.error('box must be a 3-element array\n') + raise ValueError if box.dtype != np.float32: box = np.float32(box) # make sure contiguous @@ -97,12 +101,14 @@ def residue_distances(xyzlist, residue_membership, residue_contacts): traj_length, num_atoms, num_dims = xyzlist.shape if not num_dims == 3: - raise ValueError("xyzlist must be n x m x 3") + logger.error("xyzlist must be n x m x 3\n") + raise ValueError try: num_contacts, width = residue_contacts.shape assert width is 2 except (AttributeError, ValueError, AssertionError): - raise ValueError('residue_contacts must be an n x 2 array') + logger.error('residue_contacts must be an n x 2 array\n') + raise ValueError # check type if xyzlist.dtype != np.float32: diff --git a/src/forcefield.py b/src/forcefield.py index b49c1b46d..7db442a39 100644 --- a/src/forcefield.py +++ b/src/forcefield.py @@ -179,7 +179,8 @@ def __missing__(self, key): try: return self.backup_dict[self['AtomType']][key] except: - raise KeyError('The key %s does not exist as an atom attribute or as an atom type attribute!' % key) + logger.error('The key %s does not exist as an atom attribute or as an atom type attribute!\n' % key) + raise KeyError class FF(forcebalance.BaseClass): """ Force field class. @@ -477,7 +478,7 @@ def addff_txt(self, ffname, fftype): for k in kwds: if sline.count(k) > 1: logger.error(line) - logger.error("The above line contains multiple occurrences of the keyword %s" % k) + logger.error("The above line contains multiple occurrences of the keyword %s\n" % k) raise RuntimeError elif sline.count(k) == 1: marks[k] = (np.array(sline) == k).argmax() @@ -531,7 +532,8 @@ def addff_txt(self, ffname, fftype): count += 1 sys.stderr.write("\nOffending ID: %s\n" % sline[parse+1]) - raise Exception('Parameter repetition entry in force field file is incorrect (see above)') + logger.error('Parameter repetition entry in force field file is incorrect (see above)\n') + raise RuntimeError pid = self.Readers[ffname].build_pid(pfld) self.map[pid] = prep # This repeated parameter ID also has these atoms involved. @@ -626,9 +628,11 @@ def make(self,vals=None,use_pvals=False,printdir=None,precision=12): """ if type(vals)==np.ndarray and vals.ndim != 1: - raise Exception('Please only pass 1-D arrays') + logger.error('Please only pass 1-D arrays\n') + raise RuntimeError if len(vals) != self.np: - raise Exception('Input parameter np.array (%i) not the required size (%i)' % (len(vals), self.np)) + logger.error('Input parameter np.array (%i) not the required size (%i)\n' % (len(vals), self.np)) + raise RuntimeError if use_pvals or self.use_pvals: logger.info("Using physical parameters directly!\r") pvals = vals.copy().flatten() @@ -684,7 +688,8 @@ def TXTFormat(number, precision): wval = eval(cmd.replace("PARM","PRM")) except: logger.error(traceback.format_exc() + '\n') - raise Exception("The command %s (written in the force field file) cannot be evaluated!" % cmd) + logger.error("The command %s (written in the force field file) cannot be evaluated!\n" % cmd) + raise RuntimeError else: wval = mult*pvals[i] if self.ffdata_isxml[fnm]: @@ -833,7 +838,8 @@ def create_pvals(self,mvals): pvals = np.exp(mvals.flatten()) * self.pvals0 except: logger.exception(mvals + '\n') - raise Exception('What the hell did you do?') + logger.error('What the hell did you do?\n') + raise RuntimeError else: pvals = flat(np.matrix(self.tmI)*col(mvals)) + self.pvals0 concern= ['polarizability','epsilon','VDWT'] @@ -859,7 +865,8 @@ def create_mvals(self,pvals): @return mvals The mathematical parameters """ if self.logarithmic_map: - raise Exception('create_mvals has not been implemented for logarithmic_map') + logger.error('create_mvals has not been implemented for logarithmic_map\n') + raise RuntimeError mvals = flat(invert_svd(self.tmI) * col(pvals - self.pvals0)) return mvals diff --git a/src/gmxio.py b/src/gmxio.py index 04e678a73..273438ada 100644 --- a/src/gmxio.py +++ b/src/gmxio.py @@ -73,7 +73,8 @@ def write_mdp(fout, options, fin=None, defaults={}, verbose=False): val = options[key] val0 = valf.strip() if key in clashes and val != val0: - raise RuntimeError("write_mdp tried to set %s = %s but its original value was %s = %s" % (key, val, key, val0)) + logger.error("write_mdp tried to set %s = %s but its original value was %s = %s\n" % (key, val, key, val0)) + raise RuntimeError # Passing None as the value causes the option to be deleted if val == None: continue if len(val) < len(valf): @@ -518,7 +519,8 @@ def setopts(self, **kwargs): warn_once("The 'gmxpath' option was not specified; using default.") if which('mdrun'+self.gmxsuffix) == '': warn_press_key("Please add GROMACS executables to the PATH or specify gmxpath.") - raise RuntimeError("Cannot find the GROMACS executables!") + logger.error("Cannot find the GROMACS executables!\n") + raise RuntimeError else: self.gmxpath = which('mdrun'+self.gmxsuffix) havegmx = True @@ -629,7 +631,8 @@ def prepare(self, pbc=False, **kwargs): if self.top != None and os.path.exists(self.top): LinkFile(self.top, '%s.top' % self.name) else: - raise RuntimeError("No .top file found, cannot continue.") + logger.error("No .top file found, cannot continue.\n") + raise RuntimeError write_mdp("%s.mdp" % self.name, gmx_opts, fin=self.mdp, defaults=self.gmx_defs) ## Call grompp followed by gmxdump to read the trajectory @@ -678,13 +681,15 @@ def links(self): if topfile != None: LinkFile(topfile, "%s.top" % self.name) else: - raise RuntimeError("No .top file found, cannot continue.") + logger.error("No .top file found, cannot continue.\n") + raise RuntimeError if not os.path.exists('%s.mdp' % self.name): mdpfile = onefile('mdp') if mdpfile != None: LinkFile(mdpfile, "%s.mdp" % self.name, nosrcok=True) else: - raise RuntimeError("No .mdp file found, cannot continue.") + logger.error("No .mdp file found, cannot continue.\n") + raise RuntimeError def callgmx(self, command, stdin=None, print_to_screen=False, print_command=False, **kwargs): @@ -738,7 +743,8 @@ def warngmx(self, command, warnings=[], maxwarn=1, **kwargs): elif fatal: for line in o: logger.error(line+'\n') - raise RuntimeError('grompp encountered a fatal error!') + logger.error('grompp encountered a fatal error!\n') + raise RuntimeError return o def energy_termnames(self, edrfile=None): @@ -748,7 +754,8 @@ def energy_termnames(self, edrfile=None): if edrfile == None: edrfile = "%s.edr" % self.name if not os.path.exists(edrfile): - raise RuntimeError('Cannot determine energy term names without an .edr file') + logger.error('Cannot determine energy term names without an .edr file\n') + raise RuntimeError ## Figure out which energy terms need to be printed. o = self.callgmx("g_energy -f %s -xvg no" % (edrfile), stdin="Total-Energy\n", copy_stdout=False, copy_stderr=True) parsemode = 0 @@ -1352,7 +1359,8 @@ def __init__(self,options,tgt_opts,forcefield): # Error checking. for i in self.nptfiles: if not os.path.exists(os.path.join(self.root, self.tgtdir, i)): - raise RuntimeError('Please provide %s; it is needed to proceed.' % i) + logger.error('Please provide %s; it is needed to proceed.\n' % i) + raise RuntimeError # Send back last frame of production trajectory. self.extra_output = ['liquid-md.gro'] # Send back the trajectory file. @@ -1402,7 +1410,8 @@ def __init__(self,options,tgt_opts,forcefield): # Error checking. for i in self.nptfiles: if not os.path.exists(os.path.join(self.root, self.tgtdir, i)): - raise RuntimeError('Please provide %s; it is needed to proceed.' % i) + logger.error('Please provide %s; it is needed to proceed.\n' % i) + raise RuntimeError # Send back last frame of production trajectory. self.extra_output = ['lipid-md.gro'] # Send back the trajectory file. diff --git a/src/interaction.py b/src/interaction.py index f82aed455..7ae3052db 100644 --- a/src/interaction.py +++ b/src/interaction.py @@ -56,12 +56,14 @@ def __init__(self,options,tgt_opts,forcefield): ## Set fragment 1 self.set_option(tgt_opts,'fragment1','fragment1') if len(self.fragment1) == 0: - raise Exception('You need to define the first fragment using the fragment1 keyword') + logger.error('You need to define the first fragment using the fragment1 keyword\n') + raise RuntimeError self.select1 = np.array(uncommadash(self.fragment1)) ## Set fragment 2 self.set_option(tgt_opts,'fragment2','fragment2') if len(self.fragment2) == 0: - raise Exception('You need to define the second fragment using the fragment2 keyword') + logger.error('You need to define the second fragment using the fragment2 keyword\n') + raise RuntimeError self.select2 = np.array(uncommadash(self.fragment2)) ## Set upper cutoff energy self.set_option(tgt_opts,'energy_upper','energy_upper') @@ -94,7 +96,8 @@ def __init__(self,options,tgt_opts,forcefield): if self.cauchy: self.divisor = np.sqrt(self.eqm**2 + denom**2) if self.attenuate: - raise Exception('attenuate and cauchy are mutually exclusive') + logger.error('attenuate and cauchy are mutually exclusive\n') + raise RuntimeError elif self.attenuate: # Attenuate only large repulsions. self.divisor = np.zeros(len(self.eqm)) diff --git a/src/lipid.py b/src/lipid.py index dc73bcacf..8a8f9bfee 100644 --- a/src/lipid.py +++ b/src/lipid.py @@ -102,7 +102,8 @@ def __init__(self,options,tgt_opts,forcefield): #======================================# # Read in lipid starting coordinates. if not os.path.exists(os.path.join(self.root, self.tgtdir, self.lipid_coords)): - raise RuntimeError("%s doesn't exist; please provide lipid_coords option" % self.lipid_coords) + logger.error("%s doesn't exist; please provide lipid_coords option\n" % self.lipid_coords) + raise RuntimeError self.lipid_mol = Molecule(os.path.join(self.root, self.tgtdir, self.lipid_coords)) # List of trajectory files that may be deleted if self.save_traj == 1. self.last_traj = [] @@ -174,11 +175,14 @@ def read_data(self): found_headings = True headings = line if len(set(headings)) != len(headings): - raise Exception('Column headings in data.csv must be unique') + logger.error('Column headings in data.csv must be unique\n') + raise RuntimeError if 'p' not in headings: - raise Exception('There must be a pressure column heading labeled by "p" in data.csv') + logger.error('There must be a pressure column heading labeled by "p" in data.csv\n') + raise RuntimeError if 't' not in headings: - raise Exception('There must be a temperature column heading labeled by "t" in data.csv') + logger.error('There must be a temperature column heading labeled by "t" in data.csv\n') + raise RuntimeError elif found_headings: try: # Temperatures are in kelvin. @@ -188,7 +192,8 @@ def read_data(self): punit = [val.split()[1] if len(val.split()) >= 1 else "atm" for head, val in zip(headings,line) if head == 'p'][0] unrec = set([punit]).difference(['atm','bar']) if len(unrec) > 0: - raise Exception('The pressure unit %s is not recognized, please use bar or atm' % unrec[0]) + logger.error('The pressure unit %s is not recognized, please use bar or atm\n' % unrec[0]) + raise RuntimeError # This line actually reads the reference data and inserts it into the RefData dictionary of dictionaries. for head, val in zip(headings,line): if head == 't' or head == 'p' : continue @@ -202,17 +207,20 @@ def read_data(self): self.RefData.setdefault(head,OrderedDict([]))[(t,pval,punit)] = np.array(map(float, val.split())) except: logger.error(line + '\n') - raise Exception('Encountered an error reading this line!') + logger.error('Encountered an error reading this line!\n') + raise RuntimeError else: logger.error(line + '\n') - raise Exception('I did not recognize this line!') + logger.error('I did not recognize this line!\n') + raise RuntimeError # Check the reference data table for validity. default_denoms = defaultdict(int) PhasePoints = None for head in self.RefData: if head not in known_vars+[i+"_wt" for i in known_vars]: # Only hard-coded properties may be recognized. - raise Exception("The column heading %s is not recognized in data.csv" % head) + logger.error("The column heading %s is not recognized in data.csv\n" % head) + raise RuntimeError if head in known_vars: if head+"_wt" not in self.RefData: # If the phase-point weights are not specified in the reference data file, initialize them all to one. @@ -500,7 +508,8 @@ def get(self, mvals, AGrad=True, AHess=True): # for obs in self.RefData: # del self.RefData[obs][PT] if len(Points) == 0: - raise Exception('The lipid simulations have terminated with \x1b[1;91mno readable data\x1b[0m - this is a problem!') + logger.error('The lipid simulations have terminated with \x1b[1;91mno readable data\x1b[0m - this is a problem!\n') + raise RuntimeError # Assign variable names to all the stuff in npt_result.p Rhos, Vols, Potentials, Energies, Dips, Grads, GDips, \ @@ -508,7 +517,8 @@ def get(self, mvals, AGrad=True, AHess=True): # Determine the number of molecules if len(set(NMols)) != 1: logger.error(str(NMols)) - raise Exception('The above list should only contain one number - the number of molecules') + logger.error('The above list should only contain one number - the number of molecules\n') + raise RuntimeError else: NMol = list(set(NMols))[0] diff --git a/src/liquid.py b/src/liquid.py index e276037d9..d394eb051 100644 --- a/src/liquid.py +++ b/src/liquid.py @@ -104,11 +104,13 @@ def __init__(self,options,tgt_opts,forcefield): #======================================# # Read in liquid starting coordinates. if not os.path.exists(os.path.join(self.root, self.tgtdir, self.liquid_coords)): - raise RuntimeError("%s doesn't exist; please provide liquid_coords option" % self.liquid_coords) + logger.error("%s doesn't exist; please provide liquid_coords option\n" % self.liquid_coords) + raise RuntimeError self.liquid_mol = Molecule(os.path.join(self.root, self.tgtdir, self.liquid_coords)) # Read in gas starting coordinates. if not os.path.exists(os.path.join(self.root, self.tgtdir, self.gas_coords)): - raise RuntimeError("%s doesn't exist; please provide gas_coords option" % self.gas_coords) + logger.error("%s doesn't exist; please provide gas_coords option\n" % self.gas_coords) + raise RuntimeError self.gas_mol = Molecule(os.path.join(self.root, self.tgtdir, self.gas_coords)) # List of trajectory files that may be deleted if self.save_traj == 1. self.last_traj = [] @@ -181,11 +183,14 @@ def read_data(self): found_headings = True headings = line if len(set(headings)) != len(headings): - raise Exception('Column headings in data.csv must be unique') + logger.error('Column headings in data.csv must be unique\n') + raise RuntimeError if 'p' not in headings: - raise Exception('There must be a pressure column heading labeled by "p" in data.csv') + logger.error('There must be a pressure column heading labeled by "p" in data.csv\n') + raise RuntimeError if 't' not in headings: - raise Exception('There must be a temperature column heading labeled by "t" in data.csv') + logger.error('There must be a temperature column heading labeled by "t" in data.csv\n') + raise RuntimeError elif found_headings: try: # Temperatures are in kelvin. @@ -195,7 +200,8 @@ def read_data(self): punit = [val.split()[1] if len(val.split()) >= 1 else "atm" for head, val in zip(headings,line) if head == 'p'][0] unrec = set([punit]).difference(['atm','bar']) if len(unrec) > 0: - raise Exception('The pressure unit %s is not recognized, please use bar or atm' % unrec[0]) + logger.error('The pressure unit %s is not recognized, please use bar or atm\n' % unrec[0]) + raise RuntimeError # This line actually reads the reference data and inserts it into the RefData dictionary of dictionaries. for head, val in zip(headings,line): if head == 't' or head == 'p' : continue @@ -207,17 +213,20 @@ def read_data(self): self.RefData.setdefault(head,OrderedDict([]))[(t,pval,punit)] = False except: logger.error(line + '\n') - raise Exception('Encountered an error reading this line!') + logger.error('Encountered an error reading this line!\n') + raise RuntimeError else: logger.error(line + '\n') - raise Exception('I did not recognize this line!') + logger.error('I did not recognize this line!\n') + raise RuntimeError # Check the reference data table for validity. default_denoms = defaultdict(int) PhasePoints = None for head in self.RefData: if head not in known_vars+[i+"_wt" for i in known_vars]: # Only hard-coded properties may be recognized. - raise Exception("The column heading %s is not recognized in data.csv" % head) + logger.error("The column heading %s is not recognized in data.csv\n" % head) + raise RuntimeError if head in known_vars: if head+"_wt" not in self.RefData: # If the phase-point weights are not specified in the reference data file, initialize them all to one. @@ -470,7 +479,8 @@ def read(self, mvals, AGrad=True, AHess=True): logger.warning('The file ./%s/npt_result.p does not exist so we cannot read it\n' % label) pass if len(Points) == 0: - raise Exception('The liquid simulations have terminated with \x1b[1;91mno readable data\x1b[0m - this is a problem!') + logger.error('The liquid simulations have terminated with \x1b[1;91mno readable data\x1b[0m - this is a problem!\n') + raise RuntimeError # Assign variable names to all the stuff in npt_result.p Rhos, Vols, Potentials, Energies, Dips, Grads, GDips, mPotentials, mEnergies, mGrads, \ @@ -478,7 +488,8 @@ def read(self, mvals, AGrad=True, AHess=True): # Determine the number of molecules if len(set(NMols)) != 1: logger.error(str(NMols)) - raise Exception('The above list should only contain one number - the number of molecules') + logger.error('The above list should only contain one number - the number of molecules\n') + raise RuntimeError else: NMol = list(set(NMols))[0] @@ -581,7 +592,8 @@ def get(self, mvals, AGrad=True, AHess=True): logger.warning('The file ./%s/npt_result.p does not exist so we cannot read it\n' % label) pass if len(Points) == 0: - raise Exception('The liquid simulations have terminated with \x1b[1;91mno readable data\x1b[0m - this is a problem!') + logger.error('The liquid simulations have terminated with \x1b[1;91mno readable data\x1b[0m - this is a problem!\n') + raise RuntimeError # Assign variable names to all the stuff in npt_result.p Rhos, Vols, Potentials, Energies, Dips, Grads, GDips, mPotentials, mEnergies, mGrads, \ @@ -589,7 +601,8 @@ def get(self, mvals, AGrad=True, AHess=True): # Determine the number of molecules if len(set(NMols)) != 1: logger.error(str(NMols)) - raise Exception('The above list should only contain one number - the number of molecules') + logger.error('The above list should only contain one number - the number of molecules\n') + raise RuntimeError else: NMol = list(set(NMols))[0] @@ -627,7 +640,8 @@ def get(self, mvals, AGrad=True, AHess=True): "Increasing simulation length: %i -> %i steps" % \ (Nrpt, self.liquid_md_steps, sumsteps), color=6) if self.liquid_md_steps * 2 != sumsteps: - raise RuntimeError("Spoo!") + logger.error("Spoo!\n") + raise RuntimeError self.liquid_eq_steps *= 2 self.liquid_md_steps *= 2 self.gas_eq_steps *= 2 @@ -787,17 +801,20 @@ def deprod(vec): Hvap_grad[PT] -= GEPol if hasattr(self,'use_cni') and self.use_cni: if not ('cni' in self.RefData and self.RefData['cni'][PT]): - raise RuntimeError('Asked for a nonideality correction but not provided in reference data (data.csv). Either disable the option in data.csv or add data.') + logger.error('Asked for a nonideality correction but not provided in reference data (data.csv). Either disable the option in data.csv or add data.\n') + raise RuntimeError logger.debug("Adding % .3f to enthalpy of vaporization at " % self.RefData['cni'][PT] + str(PT) + '\n') Hvap_calc[PT] += self.RefData['cni'][PT] if hasattr(self,'use_cvib_intra') and self.use_cvib_intra: if not ('cvib_intra' in self.RefData and self.RefData['cvib_intra'][PT]): - raise RuntimeError('Asked for a quantum intramolecular vibrational correction but not provided in reference data (data.csv). Either disable the option in data.csv or add data.') + logger.error('Asked for a quantum intramolecular vibrational correction but not provided in reference data (data.csv). Either disable the option in data.csv or add data.\n') + raise RuntimeError logger.debug("Adding % .3f to enthalpy of vaporization at " % self.RefData['cvib_intra'][PT] + str(PT) + '\n') Hvap_calc[PT] += self.RefData['cvib_intra'][PT] if hasattr(self,'use_cvib_inter') and self.use_cvib_inter: if not ('cvib_inter' in self.RefData and self.RefData['cvib_inter'][PT]): - raise RuntimeError('Asked for a quantum intermolecular vibrational correction but not provided in reference data (data.csv). Either disable the option in data.csv or add data.') + logger.error('Asked for a quantum intermolecular vibrational correction but not provided in reference data (data.csv). Either disable the option in data.csv or add data.\n') + raise RuntimeError logger.debug("Adding % .3f to enthalpy of vaporization at " % self.RefData['cvib_inter'][PT] + str(PT) + '\n') Hvap_calc[PT] += self.RefData['cvib_inter'][PT] else: diff --git a/src/molecule.py b/src/molecule.py index 99b87e70a..2f3ccbd34 100644 --- a/src/molecule.py +++ b/src/molecule.py @@ -246,9 +246,11 @@ def unmangle(M1, M2): M.elem = list(np.array(PDB.elem)[unmangled]) """ if len(M1) != 1 or len(M2) != 1: - raise RuntimeError("Unmangler only deals with length-1 molecule objects") + logger.error("Unmangler only deals with length-1 molecule objects\n") + raise RuntimeError if M1.na != M2.na: - raise RuntimeError("Unmangler only deals with same number of atoms") + logger.error("Unmangler only deals with same number of atoms\n") + raise RuntimeError unmangler = {} for i in range(M1.na): for j in range(M2.na): @@ -256,7 +258,8 @@ def unmangle(M1, M2): unmangler[j] = i unmangled = [unmangler[i] for i in sorted(unmangler.keys())] if len(unmangled) != M1.na: - raise RuntimeError("Unmangler failed (different structures?)") + logger.error("Unmangler failed (different structures?)\n") + raise RuntimeError return unmangled def nodematch(node1,node2): @@ -720,7 +723,7 @@ def __getattr__(self, key): return len(self.xyzs[0]) else: return 0 - #raise Exception('na is ill-defined if the molecule has no AtomKeys member variables.') + #raise RuntimeError('na is ill-defined if the molecule has no AtomKeys member variables.') ## These attributes return a list of attribute names defined in this class that belong in the chosen category. ## For example: self.FrameKeys should return set(['xyzs','boxes']) if xyzs and boxes exist in self.Data elif key == 'FrameKeys': @@ -761,7 +764,8 @@ def __getitem__(self, key): New.Data[k] = copy.deepcopy(self.Data[k]) return New else: - raise Exception('getitem is not implemented for keys of type %s' % str(key)) + logger.error('getitem is not implemented for keys of type %s\n' % str(key)) + raise RuntimeError def __delitem__(self, key): """ @@ -788,7 +792,8 @@ def __add__(self,other): """ Add method for Molecule objects. """ # Check type of other if not isinstance(other,Molecule): - raise TypeError('A Molecule instance can only be added to another Molecule instance') + logger.error('A Molecule instance can only be added to another Molecule instance\n') + raise TypeError # Create the sum of the two classes by copying the first class. Sum = Molecule() for key in AtomVariableNames | MetaVariableNames: @@ -799,7 +804,8 @@ def __add__(self,other): elif diff(self, other, key): for i, j in zip(self.Data[key], other.Data[key]): print i, j, i==j - raise Exception('The data member called %s is not the same for these two objects' % key) + logger.error('The data member called %s is not the same for these two objects\n' % key) + raise RuntimeError elif key in self.Data: Sum.Data[key] = copy.deepcopy(self.Data[key]) elif key in other.Data: @@ -807,9 +813,11 @@ def __add__(self,other): for key in FrameVariableNames: if both(self, other, key): if type(self.Data[key]) is not list: - raise Exception('Key %s in self is a FrameKey, it must be a list' % key) + logger.error('Key %s in self is a FrameKey, it must be a list\n' % key) + raise RuntimeError if type(other.Data[key]) is not list: - raise Exception('Key %s in other is a FrameKey, it must be a list' % key) + logger.error('Key %s in other is a FrameKey, it must be a list\n' % key) + raise RuntimeError Sum.Data[key] = list(self.Data[key] + other.Data[key]) elif either(self, other, key): # TINKER 6.3 compatibility - catch the specific case that one has a periodic box and the other doesn't. @@ -819,21 +827,24 @@ def __add__(self,other): elif key in other.Data: self.Data['boxes'] = [other.Data['boxes'][0] for i in range(len(self))] else: - raise Exception('Key %s is a FrameKey, must exist in both self and other for them to be added (for now).' % key) + logger.error('Key %s is a FrameKey, must exist in both self and other for them to be added (for now).\n' % key) + raise RuntimeError return Sum def __iadd__(self,other): """ Add method for Molecule objects. """ # Check type of other if not isinstance(other,Molecule): - raise TypeError('A Molecule instance can only be added to another Molecule instance') + logger.error('A Molecule instance can only be added to another Molecule instance\n') + raise TypeError # Create the sum of the two classes by copying the first class. for key in AtomVariableNames | MetaVariableNames: if key in ['fnm', 'ftype', 'bonds']: pass elif diff(self, other, key): for i, j in zip(self.Data[key], other.Data[key]): print i, j, i==j - raise Exception('The data member called %s is not the same for these two objects' % key) + logger.error('The data member called %s is not the same for these two objects\n' % key) + raise RuntimeError # Information from the other class is added to this class (if said info doesn't exist.) elif key in other.Data: self.Data[key] = copy.deepcopy(other.Data[key]) @@ -841,9 +852,11 @@ def __iadd__(self,other): for key in FrameVariableNames: if both(self, other, key): if type(self.Data[key]) is not list: - raise Exception('Key %s in self is a FrameKey, it must be a list' % key) + logger.error('Key %s in self is a FrameKey, it must be a list\n' % key) + raise RuntimeError if type(other.Data[key]) is not list: - raise Exception('Key %s in other is a FrameKey, it must be a list' % key) + logger.error('Key %s in other is a FrameKey, it must be a list\n' % key) + raise RuntimeError self.Data[key] += other.Data[key] elif either(self, other, key): # TINKER 6.3 compatibility - catch the specific case that one has a periodic box and the other doesn't. @@ -853,7 +866,8 @@ def __iadd__(self,other): elif key in other.Data: self.Data['boxes'] = [other.Data['boxes'][0] for i in range(len(self))] else: - raise Exception('Key %s is a FrameKey, must exist in both self and other for them to be added (for now).' % key) + logger.error('Key %s is a FrameKey, must exist in both self and other for them to be added (for now).\n' % key) + raise RuntimeError return self def repair(self, key, klast): @@ -868,9 +882,10 @@ def repair(self, key, klast): # If we only have one box then we can fill in the rest of the trajectory. for i in range(diff): self.Data['boxes'].append(self.Data['boxes'][-1]) else: - raise Exception('The keys %s and %s have different lengths (%i %i)' - '- this isn\'t supposed to happen for two AtomKeys member variables.' \ - % (key, klast, len(self.Data[key]), len(self.Data[klast]))) + logger.error('The keys %s and %s have different lengths (%i %i)' + '- this isn\'t supposed to happen for two AtomKeys member variables.' + % (key, klast, len(self.Data[key]), len(self.Data[klast]))) + raise RuntimeError def reorder_according_to(self, other): @@ -966,7 +981,8 @@ def __init__(self, fnm = None, ftype = None, positive_resid=True, build_topology ## Try to determine from the file name using the extension. ftype = os.path.splitext(fnm)[1][1:] if not os.path.exists(fnm): - raise IOError('Tried to create Molecule object from a file that does not exist: %s' % fnm) + logger.error('Tried to create Molecule object from a file that does not exist: %s\n' % fnm) + raise IOError self.Data['ftype'] = ftype ## Actually read the file. Parsed = self.Read_Tab[self.Funnel[ftype.lower()]](fnm, **kwargs) @@ -1000,7 +1016,8 @@ def __init__(self, fnm = None, ftype = None, positive_resid=True, build_topology def require(self, *args): for arg in args: if arg not in self.Data: - raise Exception("%s is a required attribute for writing this type of file but it's not present" % arg) + logger.error("%s is a required attribute for writing this type of file but it's not present\n" % arg) + raise RuntimeError # def read(self, fnm, ftype = None): # """ Read in a file. """ @@ -1014,7 +1031,8 @@ def require(self, *args): def write(self,fnm=None,ftype=None,append=False,select=None,**kwargs): if fnm == None and ftype == None: - raise Exception("Output file name and file type are not specified.") + logger.error("Output file name and file type are not specified.\n") + raise RuntimeError elif ftype == None: ftype = os.path.splitext(fnm)[1][1:] ## Fill in comments. @@ -1113,7 +1131,8 @@ def rigid_water(self): def load_frames(self, fnm): NewMol = Molecule(fnm) if NewMol.na != self.na: - raise Exception('When loading frames, don\'t change the number of atoms.') + logger.error('When loading frames, don\'t change the number of atoms.\n') + raise RuntimeError for key in NewMol.FrameKeys: self.Data[key] = NewMol.Data[key] @@ -1140,7 +1159,8 @@ def add_quantum(self, other): OtherMol = Molecule(other) for key in OtherMol.QuantumKeys: if key in AtomVariableNames and len(OtherMol.Data[key]) != self.na: - raise Exception('The quantum-key %s is AtomData, but it doesn\'t have the same number of atoms as the Molecule object we\'re adding it to.') + logger.error('The quantum-key %s is AtomData, but it doesn\'t have the same number of atoms as the Molecule object we\'re adding it to.') + raise RuntimeError self.Data[key] = copy.deepcopy(OtherMol.Data[key]) def add_virtual_site(self, idx, **kwargs): @@ -1149,7 +1169,8 @@ def add_virtual_site(self, idx, **kwargs): if key in kwargs: self.Data[key].insert(idx,kwargs[key]) else: - raise Exception('You need to specify %s when adding a virtual site to this molecule.' % key) + logger.error('You need to specify %s when adding a virtual site to this molecule.\n' % key) + raise RuntimeError if 'xyzs' in self.Data: for i, xyz in enumerate(self.xyzs): if 'pos' in kwargs: @@ -1157,7 +1178,8 @@ def add_virtual_site(self, idx, **kwargs): else: self.xyzs[i] = np.insert(xyz, idx, 0.0, axis=0) else: - raise Exception('You need to have xyzs in this molecule to add a virtual site.') + logger.error('You need to have xyzs in this molecule to add a virtual site.\n') + raise RuntimeError def replace_peratom(self, key, orig, want): """ Replace all of the data for a certain attribute in the system from orig to want. """ @@ -1166,7 +1188,8 @@ def replace_peratom(self, key, orig, want): if self.Data[key][i] == orig: self.Data[key][i] = want else: - raise Exception('The key that we want to replace (%s) doesn\'t exist.' % key) + logger.error('The key that we want to replace (%s) doesn\'t exist.\n' % key) + raise RuntimeError def replace_peratom_conditional(self, key1, cond, key2, orig, want): """ Replace all of the data for a attribute key2 from orig to want, contingent on key1 being equal to cond. @@ -1176,7 +1199,8 @@ def replace_peratom_conditional(self, key1, cond, key2, orig, want): if self.Data[key2][i] == orig and self.Data[key1][i] == cond: self.Data[key2][i] = want else: - raise Exception('Either the comparison or replacement key (%s, %s) doesn\'t exist.' % (key1, key2)) + logger.error('Either the comparison or replacement key (%s, %s) doesn\'t exist.\n' % (key1, key2)) + raise RuntimeError def atom_select(self,atomslice): """ Return a copy of the object with certain atoms selected. Takes an integer, list or array as argument. """ @@ -1219,7 +1243,8 @@ def atom_select(self,atomslice): def atom_stack(self, other): """ Return a copy of the object with another molecule object appended. WARNING: This function may invalidate stuff like QM energies. """ if len(other) != len(self): - raise Exception('The number of frames of the Molecule objects being stacked are not equal.') + logger.error('The number of frames of the Molecule objects being stacked are not equal.\n') + raise RuntimeError New = Molecule() for key in self.FrameKeys | self.MetaKeys: @@ -1235,7 +1260,8 @@ def FrameStack(k): # Now build the new atom keys. for key in self.AtomKeys: if key not in other.Data: - raise Exception('Trying to stack two Molecule objects - the first object contains %s and the other does not' % (key)) + logger.error('Trying to stack two Molecule objects - the first object contains %s and the other does not\n' % (key)) + raise RuntimeError if key == 'tinkersuf': # Tinker suffix is a bit tricky NewSuf = [] for line in other.Data[key]: @@ -1252,7 +1278,8 @@ def FrameStack(k): elif type(self.Data[key]) is list: New.Data[key] = self.Data[key] + other.Data[key] else: - raise Exception('Cannot stack %s because it is of type %s' % (key, str(type(New.Data[key])))) + logger.error('Cannot stack %s because it is of type %s\n' % (key, str(type(New.Data[key])))) + raise RuntimeError if 'bonds' in self.Data and 'bonds' in other.Data: New.Data['bonds'] = self.bonds + [(b[0]+self.na, b[1]+self.na) for b in other.bonds] return New @@ -1285,7 +1312,8 @@ def align(self, smooth = False, center = True, center_mass = False, select=None) if isinstance(select, list): select = np.array(select) if center and center_mass: - raise Exception('Specify center=True or center_mass=True but set the other one to False') + logger.error('Specify center=True or center_mass=True but set the other one to False\n') + raise RuntimeError coms = self.center_of_mass() xyz1 = self.xyzs[0] @@ -1461,7 +1489,8 @@ def find_angles(self): dipeptide when comparing to TINKER's analyze program. """ if not hasattr(self, 'topology'): - raise RuntimeError("Need to have built a topology to find angles") + logger.error("Need to have built a topology to find angles\n") + raise RuntimeError angidx = [] # Iterate over separate molecules @@ -1486,7 +1515,8 @@ def find_dihedrals(self): program. """ if not hasattr(self, 'topology'): - raise RuntimeError("Need to have built a topology to find dihedrals") + logger.error("Need to have built a topology to find dihedrals\n") + raise RuntimeError dihidx = [] # Iterate over separate molecules @@ -1806,7 +1836,8 @@ def read_dcd(self, fnm, **kwargs): xyzs = [] boxes = [] if _dcdlib.vmdplugin_init() != 0: - raise IOError("Unable to init DCD plugin") + logger.error("Unable to init DCD plugin\n") + raise IOError natoms = c_int(-1) frame = 0 dcd = _dcdlib.open_dcd_read(fnm, "dcd", byref(natoms)) @@ -2393,7 +2424,8 @@ def read_qcout(self, fnm, errok = [], **kwargs): Answer['qcerr'] = line.strip() fatal = 0 else: - raise Exception('Calculation encountered a fatal error! (%s)' % line) + logger.error('Calculation encountered a fatal error! (%s)\n' % line) + raise RuntimeError if 'Q-Chem fatal error' in line: fatal = 1 if XMode >= 1: @@ -2407,7 +2439,8 @@ def read_qcout(self, fnm, errok = [], **kwargs): if elem == []: elem = elemThis elif elem != elemThis: - raise Exception('Q-Chem output parser will not work if successive calculations have different numbers of atoms!') + logger.error('Q-Chem output parser will not work if successive calculations have different numbers of atoms!\n') + raise RuntimeError elemThis = [] xyzs.append(np.array(xyz)) xyz = [] @@ -2520,7 +2553,7 @@ def read_qcout(self, fnm, errok = [], **kwargs): if len(Mats['hessian_scf']['All']) > 0: Answer['qm_hessians'] = Mats['hessian_scf']['All'] #else: - # raise Exception('There are no forces in %s' % fnm) + # raise RuntimeError('There are no forces in %s' % fnm) # Also work our way down with the energies. if len(Floats['energy_ccsdt']) > 0: Answer['qm_energies'] = Floats['energy_ccsdt'] @@ -2530,20 +2563,24 @@ def read_qcout(self, fnm, errok = [], **kwargs): Answer['qm_energies'] = Floats['energy_mp2'] elif len(energy_scf) > 0: if 'correlation' in Answer['qcrems'][0] and Answer['qcrems'][0]['correlation'].lower() in ['mp2', 'rimp2', 'ccsd', 'ccsd(t)']: - raise Exception("Q-Chem was called with a post-HF theory but we only got the SCF energy") + logger.error("Q-Chem was called with a post-HF theory but we only got the SCF energy\n") + raise RuntimeError Answer['qm_energies'] = energy_scf elif 'SCF failed to converge' not in errok: - raise Exception('There are no energies in %s' % fnm) + logger.error('There are no energies in %s\n' % fnm) + raise RuntimeError #### Sanity checks # We currently don't have a graceful way of dealing with SCF convergence failures in the output file. # For instance, a failed calculation will have elem / xyz but no forces. :/ if 0 in conv and 'SCF failed to converge' not in errok: - raise Exception('SCF convergence failure encountered in parsing %s' % fnm) + logger.error('SCF convergence failure encountered in parsing %s\n' % fnm) + raise RuntimeError elif (0 not in conv): # The molecule should have only one charge and one multiplicity if len(set(Floats['charge'])) != 1 or len(set(Floats['mult'])) != 1: - raise Exception('Unexpected number of charges or multiplicities in parsing %s' % fnm) + logger.error('Unexpected number of charges or multiplicities in parsing %s\n' % fnm) + raise RuntimeError # If we have any QM energies (not the case if SCF convergence failure) if 'qm_energies' in Answer: @@ -2558,11 +2595,13 @@ def read_qcout(self, fnm, errok = [], **kwargs): mkspn.append([0.0 for j in mkchg[-1]]) lens = [len(i) for i in Answer['qm_energies'], Answer['xyzs']] if len(set(lens)) != 1: - raise Exception('The number of energies and coordinates in %s are not the same : %s' % (fnm, str(lens))) + logger.error('The number of energies and coordinates in %s are not the same : %s\n' % (fnm, str(lens))) + raise RuntimeError # The number of atoms should all be the same if len(set([len(i) for i in Answer['xyzs']])) > 1: - raise Exception('The numbers of atoms across frames in %s are not all the same' % (fnm)) + logger.error('The numbers of atoms across frames in %s are not all the same\n' % (fnm)) + raise RuntimeError if 'qm_forces' in Answer: for i, frc in enumerate(Answer['qm_forces']): @@ -2614,7 +2653,8 @@ def write_qcin(self, select, **kwargs): if 'jobtype' in self.qcrems[remidx] and self.qcrems[remidx]['jobtype'].lower() == 'fsm': fsm = True if len(select) != 2: - raise RuntimeError('For freezing string method, please provide two structures only.') + logger.error('For freezing string method, please provide two structures only.\n') + raise RuntimeError if SectName != '@@@@': out.append('$%s' % SectName) for line in SectData: @@ -2743,7 +2783,8 @@ def write_gro(self, select, **kwargs): def write_dcd(self, select, **kwargs): if _dcdlib.vmdplugin_init() != 0: - raise IOError("Unable to init DCD plugin") + logger.error("Unable to init DCD plugin\n") + raise IOError natoms = c_int(self.na) dcd = _dcdlib.open_dcd_write(self.fout, "dcd", natoms) ts = MolfileTimestep() @@ -2756,7 +2797,8 @@ def write_dcd(self, select, **kwargs): ts.C = self.boxes[I].c if 'boxes' in self.Data else 1.0 result = _dcdlib.write_timestep(dcd, byref(ts)) if result != 0: - raise IOError("Error encountered when writing DCD") + logger.error("Error encountered when writing DCD\n") + raise IOError ## Close the DCD file _dcdlib.close_file_write(dcd) dcd = None @@ -2968,7 +3010,8 @@ def buildbox(line): v3 = np.array([s[7], s[8], s[2]]) return BuildLatticeFromVectors(v1, v2, v3) else: - raise Exception("Not sure what to do since you gave me %i numbers" % len(s)) + logger.error("Not sure what to do since you gave me %i numbers\n" % len(s)) + raise RuntimeError if 'boxes' not in self.Data or len(self.boxes) != self.ns: sys.stderr.write("Please specify the periodic box using:\n") @@ -2981,7 +3024,8 @@ def buildbox(line): if os.path.exists(boxstr): boxfile = open(boxstr).readlines() if len(boxfile) != len(self): - raise Exception('Tried to read in the box file, but it has a different length from the number of frames.') + logger.error('Tried to read in the box file, but it has a different length from the number of frames.\n') + raise RuntimeError else: self.boxes = [buildbox(line) for line in boxfile] else: diff --git a/src/moments.py b/src/moments.py index 924d41f31..c249a0ddf 100644 --- a/src/moments.py +++ b/src/moments.py @@ -119,7 +119,8 @@ def read_reference_data(self): self.ref_moments['polarizability']['zz'] = float(s[2]) else: logger.info("%s\n" % line) - raise Exception("This line doesn't comply with our multipole file format!") + logger.error("This line doesn't comply with our multipole file format!\n") + raise RuntimeError ln += 1 # Subtract the trace of the quadrupole moment. if 'quadrupole' in self.ref_moments: diff --git a/src/nifty.py b/src/nifty.py index 68b944e8d..3fcc29d3b 100644 --- a/src/nifty.py +++ b/src/nifty.py @@ -124,7 +124,8 @@ def uncommadash(s): logger.warning("List is out of order\n") raise except: - raise Exception('Invalid string for converting to list of numbers: %s' % s) + logger.error('Invalid string for converting to list of numbers: %s\n' % s) + raise RuntimeError return L def extract_int(arr, avgthre, limthre, label="value", verbose=True): @@ -477,7 +478,8 @@ def statisticalInefficiency(A_n, B_n=None, fast=False, mintime=3, warn=True): N = A_n.shape[0] # Be sure A_n and B_n have the same dimensions. if(A_n.shape != B_n.shape): - raise ParameterError('A_n and B_n must have same dimensions.') + logger.error('A_n and B_n must have same dimensions.\n') + raise ParameterError # Initialize statistical inefficiency estimate with uncorrelated value. g = 1.0 # Compute mean of each timeseries. @@ -574,11 +576,13 @@ def load_etree(self): for q in "\"'": # double or single quote if rep.startswith(q): if not rep.endswith(q): - raise ValueError, "insecure string pickle" + logger.error("insecure string pickle\n") + raise ValueError rep = rep[len(q):-len(q)] break else: - raise ValueError, "insecure string pickle" + logger.error("insecure string pickle\n") + raise ValueError ## The string is converted to an _ElementTree type before it is finally loaded. self.append(etree.ElementTree(etree.fromstring(rep.decode("string-escape")))) except: @@ -817,7 +821,9 @@ def onefile(ext, arg=None): def GoInto(Dir): if os.path.exists(Dir): if os.path.isdir(Dir): pass - else: raise Exception("Tried to create directory %s, it exists but isn't a directory" % newdir) + else: + logger.error("Tried to create directory %s, it exists but isn't a directory\n" % newdir) + raise RuntimeError else: os.makedirs(Dir) os.chdir(Dir) @@ -830,7 +836,8 @@ def allsplit(Dir): def Leave(Dir): if os.path.split(os.getcwd())[1] != Dir: - raise Exception("Trying to leave directory %s, but we're actually in directory %s (check your code)" % (Dir,os.path.split(os.getcwd())[1])) + logger.error("Trying to leave directory %s, but we're actually in directory %s (check your code)\n" % (Dir,os.path.split(os.getcwd())[1])) + raise RuntimeError for i in range(len(allsplit(Dir))): os.chdir('..') @@ -871,23 +878,28 @@ def LinkFile(src, dest, nosrcok = False): if os.path.exists(src): if os.path.exists(dest): if os.path.islink(dest): pass - else: raise Exception("Tried to create symbolic link %s to %s, destination exists but isn't a symbolic link" % (src, dest)) + else: + logger.error("Tried to create symbolic link %s to %s, destination exists but isn't a symbolic link\n" % (src, dest)) + raise RuntimeError else: os.symlink(src, dest) else: if not nosrcok: - raise Exception("Tried to create symbolic link %s to %s, but source file doesn't exist%s" % (src,dest,MissingFileInspection(src))) + logger.error("Tried to create symbolic link %s to %s, but source file doesn't exist%s\n" % (src,dest,MissingFileInspection(src))) + raise RuntimeError def CopyFile(src, dest): if os.path.exists(src): if os.path.exists(dest): if os.path.islink(dest): - raise Exception("Tried to copy %s to %s, destination exists but it's a symbolic link" % (src, dest)) + logger.error("Tried to copy %s to %s, destination exists but it's a symbolic link\n" % (src, dest)) + raise RuntimeError else: shutil.copy2(src, dest) else: - raise Exception("Tried to copy %s to %s, but source file doesn't exist%s" % (src,dest,MissingFileInspection(src))) + logger.error("Tried to copy %s to %s, but source file doesn't exist%s\n" % (src,dest,MissingFileInspection(src))) + raise RuntimeError def link_dir_contents(abssrcdir, absdestdir): for fnm in os.listdir(abssrcdir): @@ -1050,7 +1062,8 @@ def process_err(read): # This code (commented out) would not throw an exception, but instead exit with the returncode of the crashed program. # sys.stderr.write("\x1b[1;94m%s\x1b[0m gave a return code of %i (\x1b[91mit may have crashed\x1b[0m)\n" % (command, p.returncode)) # sys.exit(p.returncode) - raise Exception("\x1b[1;94m%s\x1b[0m gave a return code of %i (\x1b[91mit may have crashed\x1b[0m)\n" % (command, p.returncode)) + logger.error("\x1b[1;94m%s\x1b[0m gave a return code of %i (\x1b[91mit may have crashed\x1b[0m)\n\n" % (command, p.returncode)) + raise RuntimeError # Return the output in the form of a list of lines, so we can loop over it using "for line in output". Out = process_out.stdout.split('\n') diff --git a/src/objective.py b/src/objective.py index e68373734..dd04b9242 100644 --- a/src/objective.py +++ b/src/objective.py @@ -129,13 +129,15 @@ def __init__(self, options, tgt_opts, forcefield): self.Targets = [] for opts in tgt_opts: if opts['type'] not in Implemented_Targets: - raise RuntimeError('The target type \x1b[1;91m%s\x1b[0m is not implemented!' % opts['type']) + logger.error('The target type \x1b[1;91m%s\x1b[0m is not implemented!\n' % opts['type']) + raise RuntimeError if opts["remote"]: Tgt = forcebalance.target.RemoteTarget(options, opts, forcefield) else: Tgt = Implemented_Targets[opts['type']](options,opts,forcefield) self.Targets.append(Tgt) printcool_dictionary(Tgt.PrintOptionDict,"Setup for target %s :" % Tgt.name) if len(set([Tgt.name for Tgt in self.Targets])) != len([Tgt.name for Tgt in self.Targets]): - raise Exception("The list of target names is not unique!") + logger.error("The list of target names is not unique!\n") + raise RuntimeError ## The force field (it seems to be everywhere) self.FF = forcefield ## Initialize the penalty function. diff --git a/src/openmmio.py b/src/openmmio.py index d9937d3ec..2d501f145 100644 --- a/src/openmmio.py +++ b/src/openmmio.py @@ -478,7 +478,8 @@ def setopts(self, platname="CUDA", precision="single", **kwargs): self.precision = self.precision.lower() valprecs = ['single','mixed','double'] if self.precision not in valprecs: - raise RuntimeError("Please specify one of %s for precision" % valprecs) + logger.error("Please specify one of %s for precision\n" % valprecs) + raise RuntimeError ## Set the simulation platform if self.verbose: logger.info("Setting Platform to %s\n" % self.platname) self.platform = Platform.getPlatformByName(self.platname) @@ -508,14 +509,17 @@ def readsrc(self, **kwargs): if 'pdb' in kwargs and os.path.exists(kwargs['pdb']): # Case 1. The PDB file name is provided explicitly pdbfnm = kwargs['pdb'] - if not os.path.exists(pdbfnm): raise RuntimeError("%s specified but doesn't exist" % pdbfnm) + if not os.path.exists(pdbfnm): + logger.error("%s specified but doesn't exist\n" % pdbfnm) + raise RuntimeError if 'mol' in kwargs: self.mol = kwargs['mol'] elif 'coords' in kwargs: self.mol = Molecule(kwargs['coords']) else: - raise RuntimeError('Must provide either a molecule object or coordinate file.') + logger.error('Must provide either a molecule object or coordinate file.\n') + raise RuntimeError if pdbfnm != None: mpdb = Molecule(pdbfnm) @@ -543,7 +547,8 @@ def prepare(self, pbc=False, mmopts={}, **kwargs): else: if 'ffxml' in kwargs: if not os.path.exists(kwargs['ffxml']): - raise RuntimeError("%s doesn't exist" % kwargs['ffxml']) + logger.error("%s doesn't exist\n" % kwargs['ffxml']) + raise RuntimeError self.ffxml = kwargs['ffxml'] elif onefile('xml'): self.ffxml = onefile('xml') @@ -559,7 +564,8 @@ def prepare(self, pbc=False, mmopts={}, **kwargs): if hasattr(self,'FF'): if self.AMOEBA: if self.FF.amoeba_pol == None: - raise RuntimeError('You must specify amoeba_pol if there are any AMOEBA forces.') + logger.error('You must specify amoeba_pol if there are any AMOEBA forces.\n') + raise RuntimeError if self.FF.amoeba_pol == 'mutual': self.mmopts['polarization'] = 'mutual' self.mmopts.setdefault('mutualInducedTargetEpsilon', self.FF.amoeba_eps if self.FF.amoeba_eps != None else 1e-6) @@ -597,7 +603,8 @@ def prepare(self, pbc=False, mmopts={}, **kwargs): if self.pbc: # Obtain the periodic box if self.mol.boxes[I].alpha != 90.0 or self.mol.boxes[I].beta != 90.0 or self.mol.boxes[I].gamma != 90.0: - raise RuntimeError('OpenMM cannot handle nonorthogonal boxes.') + logger.error('OpenMM cannot handle nonorthogonal boxes.\n') + raise RuntimeError box_omm = [Vec3(self.mol.boxes[I].a, 0, 0)*angstrom, Vec3(0, self.mol.boxes[I].b, 0)*angstrom, Vec3(0, 0, self.mol.boxes[I].c)*angstrom] @@ -636,7 +643,8 @@ def create_simulation(self, timestep=1.0, faststep=0.25, temperature=None, press ## If temperature control is turned on, then run Langevin dynamics. if mts: if rpmd_beads > 0: - raise RuntimeError("No multiple timestep integrator without temperature control.") + logger.error("No multiple timestep integrator without temperature control.\n") + raise RuntimeError integrator = MTSVVVRIntegrator(temperature*kelvin, collision/picosecond, timestep*femtosecond, self.system, ninnersteps=int(timestep/faststep)) else: @@ -649,7 +657,8 @@ def create_simulation(self, timestep=1.0, faststep=0.25, temperature=None, press else: ## If no temperature control, default to the Verlet integrator. if rpmd_beads > 0: - raise RuntimeError("No RPMD integrator without temperature control.") + logger.error("No RPMD integrator without temperature control.\n") + raise RuntimeError if mts: warn_once("No multiple timestep integrator without temperature control.") integrator = VerletIntegrator(timestep*femtoseconds) @@ -827,7 +836,8 @@ def energy_dipole(self): return np.hstack((Result["Energy"].reshape(-1,1), Result["Dipole"])) def normal_modes(self, shot=0, optimize=True): - raise NotImplementedError("OpenMM cannot do normal mode analysis") + logger.error("OpenMM cannot do normal mode analysis\n") + raise NotImplementedError def optimize(self, shot=0, crit=1e-4): @@ -865,7 +875,8 @@ def multipole_moments(self, shot=0, optimize=True, polarizability=False): self.update_simulation() if polarizability: - raise NotImplementedError("Polarizability calculation is available in TINKER only.") + logger.error("Polarizability calculation is available in TINKER only.\n") + raise NotImplementedError if optimize: self.optimize(shot) else: self.set_positions(shot) @@ -906,7 +917,8 @@ def interaction_energy(self, fraga, fragb): self.update_simulation() if self.name == 'A' or self.name == 'B': - raise RuntimeError("Don't name the engine A or B!") + logger.error("Don't name the engine A or B!\n") + raise RuntimeError # Create two subengines. if hasattr(self,'target'): @@ -953,11 +965,13 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None, """ if float(int(float(nequil)/float(nsave))) != float(nequil)/float(nsave): - raise RuntimeError("Please set nequil to an integer multiple of nsave") + logger.error("Please set nequil to an integer multiple of nsave\n") + raise RuntimeError iequil = nequil/nsave if float(int(float(nsteps)/float(nsave))) != float(nsteps)/float(nsave): - raise RuntimeError("Please set nsteps to an integer multiple of nsave") + logger.error("Please set nsteps to an integer multiple of nsave\n") + raise RuntimeError isteps = nsteps/nsave if hasattr(self, 'simulation'): diff --git a/src/optimizer.py b/src/optimizer.py index 2b0c4535f..caf2bdd7d 100644 --- a/src/optimizer.py +++ b/src/optimizer.py @@ -269,7 +269,9 @@ def save_mvals_to_input(self, mvals): if in_mvals: continue print >> fout, line, if line1.startswith("read_mvals"): - if have_mvals: raise RuntimeError("Encountered more than one read_mvals section") + if have_mvals: + logger.error("Encountered more than one read_mvals section\n") + raise RuntimeError have_mvals = 1 in_mvals = 1 print >> fout, self.FF.sprint_map(mvals, precision=8) diff --git a/src/output.py b/src/output.py index 7eb352274..e42e38cd8 100644 --- a/src/output.py +++ b/src/output.py @@ -76,6 +76,7 @@ def emit(self, record): class ModLogger(Logger): def error(self, msg, *args, **kwargs): + msg = '\n'.join(['\x1b[91m%s\x1b[0m' % s for s in msg.split('\n') if len(s.strip()) > 0])+'\n' for hdlr in (self.parent.handlers if self.propagate else self.handlers): hdlr.savestream = hdlr.stream hdlr.stream = sys.stderr diff --git a/src/parser.py b/src/parser.py index a1df3c4e1..40b90a8df 100644 --- a/src/parser.py +++ b/src/parser.py @@ -252,7 +252,8 @@ if i in dct: iocc.append("gen_opt_types %s" % typ) if len(iocc) != 1: - raise RuntimeError("CODING ERROR: ForceBalance option %s occurs in more than one place (%s)" % (i, str(iocc))) + logger.error("CODING ERROR: ForceBalance option %s occurs in more than one place (%s)\n" % (i, str(iocc))) + raise RuntimeError ## Default general options - basically a collapsed veresion of gen_opts_types. gen_opts_defaults = {} @@ -499,7 +500,8 @@ def parse_inputs(input_file=None): elif isfloat(s[1]) and int(float(s[1])) == 1: this_opt[key] = True else: - raise RuntimeError('%s is a true/false option but you provided %s; to enable, provide ["1", "yes", "true", "on" or ]. To disable, provide ["0", "no", "false", or "off"].' % (key, s[1])) + logger.error('%s is a true/false option but you provided %s; to enable, provide ["1", "yes", "true", "on" or ]. To disable, provide ["0", "no", "false", or "off"].\n' % (key, s[1])) + raise RuntimeError elif key in opts_types['floats']: this_opt[key] = float(s[1]) elif key in opts_types['sections']: diff --git a/src/quantity.py b/src/quantity.py index 677844be7..44b137068 100644 --- a/src/quantity.py +++ b/src/quantity.py @@ -120,8 +120,9 @@ def extract(self, engines, FF, mvals, h, AGrad=True): being fitted. """ - raise NotImplementedError(("Extract method not implemented" - " in base class.")) + logger.error("Extract method not implemented in base class.\n") + raise NotImplementedError + # class Quantity_Density class Quantity_Density(Quantity): def __init__(self, engname, temperature, pressure, name=None): diff --git a/src/target.py b/src/target.py index 0d4343c91..cb79b6006 100644 --- a/src/target.py +++ b/src/target.py @@ -143,7 +143,8 @@ def __init__(self,options,tgt_opts,forcefield): _exec("tar xvzf targets.tar.gz") tgtdir = 'targets' else: - raise Exception('\x1b[91mThe targets directory is missing!\x1b[0m\nDid you finish setting up the target data?\nPlace the data in a directory called "targets" or "simulations"') + logger.error('\x1b[91mThe targets directory is missing!\x1b[0m\nDid you finish setting up the target data?\nPlace the data in a directory called "targets" or "simulations"\n') + raise RuntimeError self.set_option(None, None, 'tgtdir', os.path.join(tgtdir,self.name)) ## Temporary (working) directory; it is temp/(target_name) ## Used for storing temporary variables that don't change through the course of the optimization @@ -360,7 +361,8 @@ def get(self,mvals,AGrad=False,AHess=False): """ - raise NotImplementedError('The get method is not implemented in the Target base class') + logger.error('The get method is not implemented in the Target base class\n') + raise NotImplementedError def check_files(self, there): @@ -394,9 +396,11 @@ def absrd(self, inum=None): """ if Counter() > First(): - raise RuntimeError("Iteration number of this run must be %s to read data from disk (it is %s)" % (First(), Counter())) + logger.error("Iteration number of this run must be %s to read data from disk (it is %s)\n" % (First(), Counter())) + raise RuntimeError if self.rd == None: - raise RuntimeError("The directory for reading is not set") + logger.error("The directory for reading is not set\n") + raise RuntimeError # Current directory. Move back into here after reading data. here = os.getcwd() @@ -407,7 +411,8 @@ def absrd(self, inum=None): abs_rd = os.path.join(self.root, self.rd) # Check for directory existence. if not os.path.exists(abs_rd): - raise RuntimeError("Provided path %s does not exist" % self.rd) + logger.error("Provided path %s does not exist\n" % self.rd) + raise RuntimeError # Figure out which directory to go into. s = os.path.split(self.rd) have_data = 0 @@ -415,7 +420,8 @@ def absrd(self, inum=None): # Case 1: User has provided a specific directory to read from. there = abs_rd if not self.check_files(there): - raise RuntimeError("Provided path %s does not contain remote target output" % self.rd) + logger.error("Provided path %s does not contain remote target output\n" % self.rd) + raise RuntimeError have_data = 1 elif s[-1] == self.name: # Case 2: User has provided the target name. @@ -428,7 +434,8 @@ def absrd(self, inum=None): else: # Case 3: User has provided something else (must contain the target name in the next directory down.) if not os.path.exists(os.path.join(abs_rd, self.name)): - raise RuntimeError("Target directory %s does not exist in %s" % (self.name, self.rd)) + logger.error("Target directory %s does not exist in %s\n" % (self.name, self.rd)) + raise RuntimeError iterints = [int(d.replace('iter_','')) for d in os.listdir(os.path.join(abs_rd, self.name)) if os.path.isdir(os.path.join(abs_rd, self.name, d))] for i in sorted(iterints)[::-1]: there = os.path.join(abs_rd, self.name, 'iter_%04i' % i) @@ -436,7 +443,8 @@ def absrd(self, inum=None): have_data = 1 break if not have_data: - raise RuntimeError("Did not find data to read in %s" % self.rd) + logger.error("Did not find data to read in %s\n" % self.rd) + raise RuntimeError if inum != None: there = os.path.join(os.path.split(there)[0],'iter_%04i' % inum) @@ -481,7 +489,8 @@ def meta_indicate(self): os.chdir(os.path.join(self.root, self.rundir)) # If indicate.log already exists then we've made some kind of mistake. if os.path.exists('indicate.log'): - raise RuntimeError('indicate.log should not exist yet in this directory: %s' % os.getcwd()) + logger.error('indicate.log should not exist yet in this directory: %s\n' % os.getcwd()) + raise RuntimeError # Add a handler for printing to screen and file logger = getLogger("forcebalance") hdlr = forcebalance.output.RawFileHandler('indicate.log') @@ -515,7 +524,8 @@ def meta_get(self, mvals, AGrad=False, AHess=False, customdir=None): if Counter() is not None: # Not expecting more than ten thousand iterations if Counter() > 10000: - raise RuntimeError('Cannot handle more than 10000 iterations due to current directory structure. Consider revising code.') + logger.error('Cannot handle more than 10000 iterations due to current directory structure. Consider revising code.\n') + raise RuntimeError iterdir = "iter_%04i" % Counter() absgetdir = os.path.join(absgetdir,iterdir) if customdir is not None: @@ -638,7 +648,8 @@ def printcool_table(self, data=OrderedDict([]), headings=[], banner=None, footno # Sanity check. for val in data.values(): if (len(val)+1) != nc: - raise RuntimeError('There are %i column headings, so the values in the data dictionary must be lists of length %i (currently %i)' % (nc, nc-1, len(val))) + logger.error('There are %i column headings, so the values in the data dictionary must be lists of length %i (currently %i)\n' % (nc, nc-1, len(val))) + raise RuntimeError cwidths = [0 for i in range(nc)] # Figure out maximum column width. # First look at all of the column headings... @@ -684,7 +695,8 @@ def __init__(self,options,tgt_opts,forcefield): self.remote_indicate = "" if options['wq_port'] == 0: - raise RuntimeError("Please set the Work Queue port to use Remote Targets.") + logger.error("Please set the Work Queue port to use Remote Targets.\n") + raise RuntimeError # Remote target will read objective.p and indicate.log at the same time, # and it uses a different mechanism because it does this at every iteration (not just the 0th). diff --git a/src/thermo.py b/src/thermo.py index 351f9957f..c2f6b3a2c 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -255,7 +255,8 @@ def sanity_check(self): self.format = "right-aligned fixed width text" else: # Sanity check - it should never get here unless the parser is incorrect. - raise RuntimeError("Fixed-width format detected but columns are neither left-aligned nor right-aligned!") + logger.error("Fixed-width format detected but columns are neither left-aligned nor right-aligned!\n") + raise RuntimeError def parse1(fnm): @@ -328,12 +329,12 @@ def __init__(self, options, tgt_opts, forcefield): LinkFile(os.path.join(os.path.split(__file__)[0], "data", f), os.path.join(self.root, self.tempdir, f)) - def read_source(self, source): + def read_source(self, srcfnm): """Read and store source data. Parameters ---------- - source : string + srcfnm : string Read source data from this filename. Returns @@ -342,40 +343,91 @@ def read_source(self, source): """ - parser = parse1(source) - print parser.headings - printcool_dictionary(parser.metadata, title="Metadata") - # print parser.table - revised_headings = [] + source = parse1(srcfnm) + printcool_dictionary(source.metadata, title="Metadata") + # print source.table + revhead = [] obs = '' def error_left(i): - logger.error('Encountered heading %s but there is no observable to the left\n' % i) + logger.error('\x1b[91mEncountered heading %s but there is no observable to the left\x1b[0m\n' % i) raise RuntimeError - for head in parser.headings: + def standardize_heading(obs, head, abbrevs, standard_abbrev): + if head in abbrevs: + if obs == '': error_left(head) + return obs + '_' + standard_abbrev, False + elif len(head.split('_')) > 1 and head.split('_')[-1] in abbrevs: + newhl = head.split('_') + newhl[-1] = standard_abbrev + return '_'.join(newhl), False + else: + return head, True + + units = defaultdict(str) + + for i, head in enumerate(source.headings): + head = head.lower() + if i == 0 and head == 'index': # Treat special case because index can also mean other things + revhead.append(head) + continue usplit = re.split(' *\(', head, maxsplit=1) + punit = '' if len(usplit) > 1: hfirst = usplit[0] punit = re.sub('\)$','',usplit[1].strip()) print "header", head, "split into", hfirst, ",", punit else: hfirst = head - punit = '' newh = hfirst - if head.lower() in ['w', 'wt', 'wts']: - if obs == '': error_left(head) - newh = obs + '_' + hfirst - elif head.lower() in ['s', 'sig', 'sigma']: - if obs == '': error_left(head) - newh = obs + '_' + hfirst - elif head.lower() in ['idx']: - if obs == '': error_left(head) - newh = obs + '_' + hfirst - else: + newh, o1 = standardize_heading(obs, newh, ['w', 'wt', 'wts', 'weight', 'weights'], 'weight') + newh, o2 = standardize_heading(obs, newh, ['s', 'sig', 'sigma', 'sigmas'], 'sigma') + newh, o3 = standardize_heading(obs, newh, ['i', 'idx', 'index', 'indices'], 'index') + if newh in ['t', 'temp', 'temperature']: newh = 'temperature' + if newh in ['p', 'pres', 'pressure']: newh = 'pressure' + if all([o1, o2, o3]): obs = hfirst if newh != hfirst: - print "header", head, "renamed to", newh + print "header", hfirst, "renamed to", newh + revhead.append(newh) + if punit != '': + units[newh] = punit + + if len(set(revhead)) != len(revhead): + logger.error('Column headings : ' + str(revhead) + '\n') + logger.error('\x1b[91mColumn headings are not unique!\x1b[0m\n') + raise RuntimeError + + print revhead + if revhead[0] != 'index': + logger.error('\x1b[91mIndex column heading is not present\x1b[0m\n(Add an Index column on the left!)\n') + raise RuntimeError + + uqidx = [] + saveidx = '' + index = [] + # thisidx = Index that is built from the current row (may be empty) + # saveidx = Index that may have been saved from a previous row + for row in source.table: + thisidx = row[0] + if thisidx != '': + saveidx = thisidx + if saveidx in uqidx: + logger.error('Index %s is duplicated in data table\n' % i) + raise RuntimeError + uqidx.append(saveidx) + index.append(saveidx) + if saveidx == '': + logger.error('Row of data : ' + str(row) + '\n') + logger.error('\x1b[91mThis row does not have an index!\x1b[0m\n') + raise RuntimeError + + self.Data = pd.DataFrame([]) + + # pd.DataFrame([OrderedDict([(head, row[i]) for i, head in revised_headings if row[i] != '']) for row in source.table]) + + # pd.DataFrame(OrderedDict([(head,[row[i] for row in source.table]) for i, head in enumerate(revised_headings)])) + # print self.Data.__repr__ raw_input() return diff --git a/src/tinkerio.py b/src/tinkerio.py index 1dcd5f485..04111ddef 100644 --- a/src/tinkerio.py +++ b/src/tinkerio.py @@ -176,7 +176,8 @@ def write_key(fout, options, fin=None, defaults={}, verbose=False, prmfnm=None, # Make sure that the keys are lowercase, and the values are all strings. options = OrderedDict([(key.lower(), str(val) if val != None else None) for key, val in options.items()]) if 'parameters' in options and prmfnm != None: - raise RuntimeError("Please pass prmfnm or 'parameters':'filename.prm' in options but not both.") + logger.error("Please pass prmfnm or 'parameters':'filename.prm' in options but not both.\n") + raise RuntimeError elif 'parameters' in options: prmfnm = options['parameters'] @@ -249,7 +250,8 @@ def write_key(fout, options, fin=None, defaults={}, verbose=False, prmfnm=None, val = options[key] val0 = valf.strip() if key in clashes and val != val0: - raise RuntimeError("write_key tried to set %s = %s but its original value was %s = %s" % (key, val, key, val0)) + logger.error("write_key tried to set %s = %s but its original value was %s = %s\n" % (key, val, key, val0)) + raise RuntimeError # Passing None as the value causes the option to be deleted if val == None: continue @@ -283,10 +285,12 @@ def write_key(fout, options, fin=None, defaults={}, verbose=False, prmfnm=None, options["parameters"] = prmfnm elif not prmflag: if not os.path.exists('%s.prm' % os.path.splitext(fout)[0]): - raise RuntimeError('No parameter file detected, this will cause TINKER to crash') + logger.error('No parameter file detected, this will cause TINKER to crash\n') + raise RuntimeError for i in chk: if i not in haveopts: - raise RuntimeError('%s is expected to be in the .key file, but not found' % i) + logger.error('%s is expected to be in the .key file, but not found\n' % i) + raise RuntimeError # Finally write the key file. file_out = wopen(fout) for line in out: @@ -336,7 +340,9 @@ def readsrc(self, **kwargs): self.mol = Molecule(kwargs['coords']) else: arcfile = onefile('arc') - if not arcfile: raise RuntimeError('Cannot determine which .arc file to use') + if not arcfile: + logger.error('Cannot determine which .arc file to use\n') + raise RuntimeError self.mol = Molecule(arcfile) def calltinker(self, command, stdin=None, print_to_screen=False, print_command=False, **kwargs): @@ -365,14 +371,16 @@ def calltinker(self, command, stdin=None, print_to_screen=False, print_command=F warn_press_key("ForceBalance requires TINKER %.1f - unexpected behavior with older versions!" % vn_need) self.warn_vn = True except: - raise RuntimeError("Unable to determine TINKER version number!") + logger.error("Unable to determine TINKER version number!\n") + raise RuntimeError for line in o[-10:]: # Catch exceptions since TINKER does not have exit status. if "TINKER is Unable to Continue" in line: for l in o: logger.error("%s\n" % l) time.sleep(1) - raise RuntimeError("TINKER may have crashed! (See above output)\nThe command was: %s\nThe directory was: %s" % (' '.join(csplit), os.getcwd())) + logger.error("TINKER may have crashed! (See above output)\nThe command was: %s\nThe directory was: %s\n" % (' '.join(csplit), os.getcwd())) + raise RuntimeError break for line in o: if 'D+' in line: @@ -445,7 +453,8 @@ def prepare(self, pbc=False, **kwargs): tk_opts['gamma'] = None if pbc: if (not keypbc) and 'boxes' not in self.mol.Data: - raise RuntimeError("Periodic boundary conditions require either (1) a-axis to be in the .key file or (b) boxes to be in the coordinate file.") + logger.error("Periodic boundary conditions require either (1) a-axis to be in the .key file or (b) boxes to be in the coordinate file.\n") + raise RuntimeError self.pbc = pbc if pbc: tk_opts['ewald'] = '' @@ -802,7 +811,8 @@ def energy_rmsd(self, shot=0, optimize=True): if "Total Potential Energy" in line: E = float(line.split()[-2].replace('D','e')) if E == None: - raise RuntimeError("Total potential energy wasn't encountered when calling analyze!") + logger.error("Total potential energy wasn't encountered when calling analyze!\n") + raise RuntimeError if optimize and abs(E-E_) > 0.1: warn_press_key("Energy from optimize and analyze aren't the same (%.3f vs. %.3f)" % (E, E_)) return E, rmsd @@ -1018,7 +1028,8 @@ def __init__(self,options,tgt_opts,forcefield): # Error checking. for i in self.nptfiles: if not os.path.exists(os.path.join(self.root, self.tgtdir, i)): - raise RuntimeError('Please provide %s; it is needed to proceed.' % i) + logger.error('Please provide %s; it is needed to proceed.\n' % i) + raise RuntimeError # Send back the trajectory file. self.extra_output = ['liquid.dyn'] if self.save_traj > 0: diff --git a/src/vibration.py b/src/vibration.py index 132beedda..3b8127d40 100644 --- a/src/vibration.py +++ b/src/vibration.py @@ -59,7 +59,8 @@ def __init__(self,options,tgt_opts,forcefield): ## Create engine object. self.engine = self.engine_(target=self, **engine_args) if self.FF.rigid_water: - raise Exception('This class cannot be used with rigid water molecules.') + logger.error('This class cannot be used with rigid water molecules.\n') + raise RuntimeError def read_reference_data(self): """ Read the reference vibrational data from a file. """ @@ -93,7 +94,8 @@ def read_reference_data(self): pass else: logger.info(line + '\n') - raise Exception("This line doesn't comply with our vibration file format!") + logger.error("This line doesn't comply with our vibration file format!\n") + raise RuntimeError ln += 1 self.ref_eigvals = np.array(self.ref_eigvals) self.ref_eigvecs = np.array(self.ref_eigvecs) @@ -114,7 +116,8 @@ def vibration_driver(self): if hasattr(self, 'engine') and hasattr(self.engine, 'normal_modes'): return self.engine.normal_modes() else: - raise NotImplementedError('Normal mode calculation not supported, try using a different engine') + logger.error('Normal mode calculation not supported, try using a different engine\n') + raise NotImplementedError def process_vectors(self, vecs, verbose=False, check=False): diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt b/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt index 3b653f242..0bfd3a7dc 100644 --- a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt +++ b/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt @@ -1,7 +1,7 @@ # Experimental data for liquid bromine. - Temp (K) Pressure (bar) Density (kg/m^3) w Hvap ( kJ/mol ) w - 298.15 1.01325 3102.8 1.0 29.96 1.0 + Index Temp (K) Pressure (bar) Density (kg/m^3) w Hvap ( kJ/mol ) w + 0 298.15 1.01325 3102.8 1.0 29.96 1.0 # Variables: Denominators and weights for quantities Denoms = 30 0.3 diff --git a/studies/004_thermo_liquid_bromine/test_parse.in b/studies/004_thermo_liquid_bromine/test_parse.in index 6e1a704df..28c854b1d 100644 --- a/studies/004_thermo_liquid_bromine/test_parse.in +++ b/studies/004_thermo_liquid_bromine/test_parse.in @@ -70,7 +70,7 @@ $target name LiquidBromine type Thermo_GMX weight 1.0 -expdata_txt expset.txt +source expset.txt quantities density h_vap n_sim_chain 2 md_steps 100000 @@ -81,7 +81,7 @@ $target name LiquidBromine_CSV type Thermo_GMX weight 1.0 -expdata_txt data.csv +source data.csv quantities density h_vap n_sim_chain 2 md_steps 100000 @@ -92,7 +92,7 @@ $target name LiquidBromine_TAB type Thermo_GMX weight 1.0 -expdata_txt data.tab.txt +source data.tab.txt quantities density h_vap n_sim_chain 2 md_steps 100000 @@ -103,7 +103,7 @@ $target name Lipid_SPC type Thermo_GMX weight 1.0 -expdata_txt lipidcol1.txt +source lipidcol1.txt quantities density h_vap n_sim_chain 2 md_steps 100000 @@ -114,7 +114,7 @@ $target name Lipid_RIT type Thermo_GMX weight 1.0 -expdata_txt lipidcol1.txt +source lipidcol1.txt quantities density h_vap n_sim_chain 2 md_steps 100000 @@ -125,7 +125,7 @@ $target name Lipid_TAB type Thermo_GMX weight 1.0 -expdata_txt lipidcol1.txt +source lipidcol1.txt quantities density h_vap n_sim_chain 2 md_steps 100000 @@ -136,7 +136,7 @@ $target name Lipid_MUL type Thermo_GMX weight 1.0 -expdata_txt lipidcol2a.txt +source lipidcol2a.txt quantities density h_vap n_sim_chain 2 md_steps 100000 From 0baea9b16f0b658315fe6bf66642e29480b378fe Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Sun, 6 Apr 2014 20:48:30 -0700 Subject: [PATCH 04/25] Added file referencing in parser and build DataFrame --- src/output.py | 2 + src/thermo.py | 187 +++++++++++++----- .../targets/Lipid_HARD/lipidcol1.txt | 67 +++++++ .../targets/Lipid_HARD/scd323.txt | 17 ++ .../targets/Lipid_HARD/scd333.txt | 17 ++ .../targets/Lipid_HARD/scd338.txt | 17 ++ .../targets/Lipid_HARD/scd353.txt | 17 ++ .../targets/Lipid_MUL/lipidcol2a.txt | 10 +- .../targets/Lipid_RIT/lipidcol1.txt | 131 ++++++------ .../targets/Lipid_SPC/lipidcol1.txt | 130 ++++++------ .../targets/Lipid_TAB/lipidcol1.txt | 130 ++++++------ .../targets/LiquidBromine_CSV/data.csv | 16 +- .../targets/LiquidBromine_TAB/data.tab.txt | 4 +- .../004_thermo_liquid_bromine/test_parse.in | 11 ++ 14 files changed, 498 insertions(+), 258 deletions(-) create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_HARD/lipidcol1.txt create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd323.txt create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd333.txt create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd338.txt create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd353.txt diff --git a/src/output.py b/src/output.py index e42e38cd8..eb760eab3 100644 --- a/src/output.py +++ b/src/output.py @@ -12,6 +12,7 @@ def __init__(self, name): self.defaultHandler = RawStreamHandler(sys.stdout) super(ForceBalanceLogger, self).addHandler(self.defaultHandler) self.setLevel(INFO) + self.propagate = False def addHandler(self, hdlr): if self.defaultHandler: @@ -86,3 +87,4 @@ def error(self, msg, *args, **kwargs): # module level loggers should use the default logger object setLoggerClass(ModLogger) + diff --git a/src/thermo.py b/src/thermo.py index c2f6b3a2c..d2eb6ee0d 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -9,15 +9,18 @@ from forcebalance.target import Target from forcebalance.finite_difference import in_fd -from forcebalance.nifty import flat, col, row +from forcebalance.nifty import flat, col, row, isint from forcebalance.nifty import lp_dump, lp_load, wopen, _exec from forcebalance.nifty import LinkFile, link_dir_contents from forcebalance.nifty import printcool, printcool_dictionary from collections import defaultdict, OrderedDict -from forcebalance.output import getLogger +import forcebalance +from forcebalance.output import * logger = getLogger(__name__) +# print logger.parent.parent.handlers[0] +# logger.parent.parent.handlers = [] class TextParser(object): """ Parse a text file. """ @@ -33,14 +36,14 @@ def is_comment_line(self): def process_header(self): """ Function for setting more attributes using the header line, if needed. """ - self.headings = [i.strip() for i in self.fields[:]] + self.heading = [i.strip() for i in self.fields[:]] def process_data(self): """ Function for setting more attributes using the current line, if needed. """ trow = [] - for ifld in range(len(self.headings)): + for ifld in range(len(self.heading)): if ifld < len(self.fields): - trow.append(self.fields[ifld]) + trow.append(self.fields[ifld].strip()) else: trow.append('') return trow @@ -49,7 +52,7 @@ def sanity_check(self): """ Extra sanity checks. """ def parse(self): - self.headings = [] # Fields in header line + self.heading = [] # Fields in header line meta = defaultdict(list) # Dictionary of metadata found_header = 0 # Whether we found the header line table = [] # List of data records @@ -240,7 +243,7 @@ def process_data(self): if set(fend).issubset(hend): for hpos in hend: if hpos in fend: - trow.append(fields[fend.index(hpos)]) + trow.append(fields[fend.index(hpos)].strip()) else: trow.append('') # Field start / end positions for the line of data @@ -289,6 +292,59 @@ def parse1(fnm): return FIX_Parser(fnm) return +def fix_suffix(obs, head, suffixs, standard_suffix): + + """ Standardize the suffix in a column heading. """ + + if head in suffixs: + if obs == '': + logger.error('\x1b[91mEncountered heading %s but there is no observable to the left\x1b[0m\n' % head) + raise RuntimeError + return obs + '_' + standard_suffix, False + elif len(head.split('_')) > 1 and head.split('_')[-1] in suffixs: + newhl = head.split('_') + newhl[-1] = standard_suffix + return '_'.join(newhl), False + else: + return head, True + +def stand_head(head, obs): + + """ + Standardize a column heading. Does the following: + + 1) Make lowercase + 2) Split off the physical unit + 3) If a weight, uncertainty or atom index, prepend the observable name + 4) Shorten temperature and pressure + 5) Determine if this is a new observable + + Parameters: + head = Name of the heading + obs = Name of the observable (e.g. from a previously read field) + """ + + head = head.lower() + usplit = re.split(' *\(', head, maxsplit=1) + punit = '' + if len(usplit) > 1: + hfirst = usplit[0] + punit = re.sub('\)$','',usplit[1].strip()) + print "header", head, "split into", hfirst, ",", punit + else: + hfirst = head + newh = hfirst + newh, o1 = fix_suffix(obs, newh, ['w', 'wt', 'wts', 'weight', 'weights'], 'wt') + newh, o2 = fix_suffix(obs, newh, ['s', 'sig', 'sigma', 'sigmas'], 'sig') + newh, o3 = fix_suffix(obs, newh, ['i', 'idx', 'index', 'indices'], 'idx') + if newh in ['t', 'temp', 'temperature']: newh = 'temp' + if newh in ['p', 'pres', 'pressure']: newh = 'pres' + if all([o1, o2, o3]): + obs = newh + if newh != hfirst: + print "header", hfirst, "renamed to", newh + return newh, punit, obs + class Thermo(Target): """ A target for fitting general experimental data sets. The @@ -343,54 +399,25 @@ def read_source(self, srcfnm): """ + logger.info('Parsing source file %s\n' % srcfnm) source = parse1(srcfnm) printcool_dictionary(source.metadata, title="Metadata") # print source.table revhead = [] obs = '' - def error_left(i): - logger.error('\x1b[91mEncountered heading %s but there is no observable to the left\x1b[0m\n' % i) - raise RuntimeError - def standardize_heading(obs, head, abbrevs, standard_abbrev): - if head in abbrevs: - if obs == '': error_left(head) - return obs + '_' + standard_abbrev, False - elif len(head.split('_')) > 1 and head.split('_')[-1] in abbrevs: - newhl = head.split('_') - newhl[-1] = standard_abbrev - return '_'.join(newhl), False - else: - return head, True units = defaultdict(str) - for i, head in enumerate(source.headings): - head = head.lower() - if i == 0 and head == 'index': # Treat special case because index can also mean other things - revhead.append(head) + for i, head in enumerate(source.heading): + if i == 0 and head.lower() == 'index': # Treat special case because index can also mean other things + revhead.append('index') continue - usplit = re.split(' *\(', head, maxsplit=1) - punit = '' - if len(usplit) > 1: - hfirst = usplit[0] - punit = re.sub('\)$','',usplit[1].strip()) - print "header", head, "split into", hfirst, ",", punit - else: - hfirst = head - newh = hfirst - newh, o1 = standardize_heading(obs, newh, ['w', 'wt', 'wts', 'weight', 'weights'], 'weight') - newh, o2 = standardize_heading(obs, newh, ['s', 'sig', 'sigma', 'sigmas'], 'sigma') - newh, o3 = standardize_heading(obs, newh, ['i', 'idx', 'index', 'indices'], 'index') - if newh in ['t', 'temp', 'temperature']: newh = 'temperature' - if newh in ['p', 'pres', 'pressure']: newh = 'pressure' - if all([o1, o2, o3]): - obs = hfirst - if newh != hfirst: - print "header", hfirst, "renamed to", newh + newh, punit, obs = stand_head(head, obs) revhead.append(newh) if punit != '': units[newh] = punit + source.heading = revhead if len(set(revhead)) != len(revhead): logger.error('Column headings : ' + str(revhead) + '\n') @@ -405,28 +432,94 @@ def standardize_heading(obs, head, abbrevs, standard_abbrev): uqidx = [] saveidx = '' index = [] + snum = 0 + drows = [] # thisidx = Index that is built from the current row (may be empty) # saveidx = Index that may have been saved from a previous row - for row in source.table: + # snum = Subindex number + # List of (index, heading) tuples which contain file references. + fref = OrderedDict() + for rn, row in enumerate(source.table): + this_insert = [] + # crow = row[1:] thisidx = row[0] if thisidx != '': saveidx = thisidx + snum = 0 if saveidx in uqidx: logger.error('Index %s is duplicated in data table\n' % i) raise RuntimeError uqidx.append(saveidx) - index.append(saveidx) + index.append((saveidx, snum)) if saveidx == '': logger.error('Row of data : ' + str(row) + '\n') logger.error('\x1b[91mThis row does not have an index!\x1b[0m\n') raise RuntimeError + snum += 1 + if any([':' in fld for fld in row[1:]]): + # Here we insert rows from another data table. + obs2 = '' + for cid_, fld in enumerate(row[1:]): + if ':' not in fld: continue + cid = cid_ + 1 + def reffld_error(reason=''): + logger.error('Row: : ' + ' '.join(row) + '\n') + logger.error('Entry : ' + fld + '\n') + logger.error('This filename:column reference is not valid!%s' % + (' (%s)' % reason if reason != '' else '')) + raise RuntimeError + if len(fld.split(':')) != 2: + reffld_error('Wrong number of colon-separated fields') + if not isint(fld.split(':')[1]): + reffld_error('Must be an integer after the colon') + fnm = fld.split(':')[0] + fcol_ = int(fld.split(':')[1]) + fpath = os.path.join(os.path.split(srcfnm)[0], fnm) + if not os.path.exists(fpath): + reffld_error('%s does not exist' % fpath) + if (saveidx, revhead[cid]) in fref: + reffld_error('%s already contains a file reference' % (saveidx, revhead[cid])) + subfile = parse1(fpath) + fcol = fcol_ - 1 + head2, punit2, obs2 = stand_head(subfile.heading[fcol], obs2) + if revhead[cid] != head2: + reffld_error("Column heading of %s (%s) doesn't match original (%s)" % (fnm, head2, revhead[cid])) + fref[(saveidx, revhead[cid])] = [row2[fcol] for row2 in subfile.table] + + for (saveidx, head), newcol in fref.items(): + inum = 0 + for irow in range(len(source.table)): + if index[irow][0] != saveidx: continue + lrow = irow + cidx = revhead.index(head) + source.table[irow][cidx] = newcol[inum] + inum += 1 + if inum >= len(newcol): break + for inum1 in range(inum, len(newcol)): + lrow += 1 + nrow = ['' for i in range(len(revhead))] + nrow[cidx] = newcol[inum] + print "Inserting", nrow, "after row", lrow + source.table.insert(lrow, nrow) + index.insert(lrow, (saveidx, inum1)) + + # for irow in range( + # for irow1 in range(max(0, len(newcol)-inum)) - self.Data = pd.DataFrame([]) + for rn, row in enumerate(source.table): + drows.append([i if i != '' else np.nan for i in row[1:]]) + + print revhead[1:] + for rn, row in enumerate(drows): + print index[rn], row + + self.Data = pd.DataFrame(drows, columns=revhead[1:], index=index) + print repr(self.Data) - # pd.DataFrame([OrderedDict([(head, row[i]) for i, head in revised_headings if row[i] != '']) for row in source.table]) + # pd.DataFrame([OrderedDict([(head, row[i]) for i, head in revised_heading if row[i] != '']) for row in source.table]) - # pd.DataFrame(OrderedDict([(head,[row[i] for row in source.table]) for i, head in enumerate(revised_headings)])) + # pd.DataFrame(OrderedDict([(head,[row[i] for row in source.table]) for i, head in enumerate(revised_heading)])) # print self.Data.__repr__ raw_input() diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/lipidcol1.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/lipidcol1.txt new file mode 100644 index 000000000..ca440c7e5 --- /dev/null +++ b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/lipidcol1.txt @@ -0,0 +1,67 @@ +metadata = 'Mao' + +Index T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic +50C 323.15 1 0.631 1 C15 scd323.txt:2 C34 scd323.txt:4 1 58 1 10 + C17 C36 + C18 C37 + C19 C38 + C20 C39 + C21 C40 + C22 C41 + C23 C42 + C24 C43 + C25 C44 + C26 C45 + C27 C46 + C28 C47 + C29 C48 + C30 C49 + C31 C50 +60C 333.15 1 0.65 1 C15 scd333.txt:2 C34 scd333.txt:4 0 58 0 10 + C17 C36 + C18 C37 + C19 C38 + C20 C39 + C21 C40 + C22 C41 + C23 C42 + C24 C43 + C25 C44 + C26 C45 + C27 C46 + C28 C47 + C29 C48 + C30 C49 + C31 C50 +65C 338.15 1 0.671 1 C15 scd338.txt:2 C34 scd338.txt:4 1 58 0 10 + C17 C36 + C18 C37 + C19 C38 + C20 C39 + C21 C40 + C22 C41 + C23 C42 + C24 C43 + C25 C44 + C26 C45 + C27 C46 + C28 C47 + C29 C48 + C30 C49 + C31 C50 +80C 353.15 1 0.719 1 C15 scd353.txt:2 C34 scd353.txt:4 1 58 0 10 + C17 C36 + C18 C37 + C19 C38 + C20 C39 + C21 C40 + C22 C41 + C23 C42 + C24 C43 + C25 C44 + C26 C45 + C27 C46 + C28 C47 + C29 C48 + C30 C49 + C31 C50 diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd323.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd323.txt new file mode 100644 index 000000000..57c1cfa5b --- /dev/null +++ b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd323.txt @@ -0,0 +1,17 @@ +Scd1_idx Scd1 Scd2_idx Scd2 +C15 C34 +C17 0.198144 C36 0.198144 +C18 0.198128 C37 0.198128 +C19 0.198111 C38 0.198111 +C20 0.198095 C39 0.198095 +C21 0.198079 C40 0.198079 +C22 0.197799 C41 0.197537 +C23 0.198045 C42 0.198046 +C24 0.178844 C43 0.178844 +C25 0.167527 C44 0.178565 +C26 0.148851 C45 0.16751 +C27 0.134117 C46 0.148834 +C28 0.119646 C47 0.1341 +C29 0.100969 C48 0.110956 +C30 0.07546 C49 0.087549 +C31 C50 diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd333.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd333.txt new file mode 100644 index 000000000..26ee01c85 --- /dev/null +++ b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd333.txt @@ -0,0 +1,17 @@ +Scd1_idx Scd1 Scd2_idx Scd2 +C15 C34 +C17 0.181121 C36 0.181121 +C18 0.180807 C37 0.180807 +C19 0.181055 C38 0.181055 +C20 0.180741 C39 0.180741 +C21 0.180989 C40 0.180989 +C22 0.168579 C41 0.168579 +C23 0.169109 C42 0.169109 +C24 0.149104 C43 0.149104 +C25 0.138945 C44 0.138945 +C26 0.123439 C45 0.138629 +C27 0.112717 C46 0.123968 +C28 0.098056 C47 0.112121 +C29 0.083396 C48 0.089303 +C30 0.062266 C49 0.070424 +C31 C50 diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd338.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd338.txt new file mode 100644 index 000000000..26ee01c85 --- /dev/null +++ b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd338.txt @@ -0,0 +1,17 @@ +Scd1_idx Scd1 Scd2_idx Scd2 +C15 C34 +C17 0.181121 C36 0.181121 +C18 0.180807 C37 0.180807 +C19 0.181055 C38 0.181055 +C20 0.180741 C39 0.180741 +C21 0.180989 C40 0.180989 +C22 0.168579 C41 0.168579 +C23 0.169109 C42 0.169109 +C24 0.149104 C43 0.149104 +C25 0.138945 C44 0.138945 +C26 0.123439 C45 0.138629 +C27 0.112717 C46 0.123968 +C28 0.098056 C47 0.112121 +C29 0.083396 C48 0.089303 +C30 0.062266 C49 0.070424 +C31 C50 diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd353.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd353.txt new file mode 100644 index 000000000..31434af01 --- /dev/null +++ b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd353.txt @@ -0,0 +1,17 @@ +Scd1_idx Scd1 Scd2_idx Scd2 +C15 C34 +C17 0.162535 C36 0.162535 +C18 0.162817 C37 0.162817 +C19 0.162535 C38 0.162535 +C20 0.162535 C39 0.162535 +C21 0.162817 C40 0.162817 +C22 0.151268 C41 0.151268 +C23 0.142254 C42 0.142254 +C24 0.127606 C43 0.127606 +C25 0.117465 C44 0.117465 +C26 0.101972 C45 0.117183 +C27 0.092676 C46 0.102535 +C28 0.081408 C47 0.092676 +C29 0.068732 C48 0.073239 +C30 0.051267 C49 0.056901 +C31 C50 diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt index 8ba35c2d9..8d97a0bcc 100644 --- a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt +++ b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt @@ -1,5 +1,5 @@ -T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic -323.15 1 0.631 1 scd323.txt:1 scd323.txt:2 scd323.txt:3 scd323.txt:4 1 58 1 10 -333.15 1 0.65 1 scd333.txt:1 scd333.txt:2 scd333.txt:3 scd333.txt:4 0 58 0 10 -338.15 1 0.671 1 scd338.txt:1 scd338.txt:2 scd338.txt:3 scd338.txt:4 1 58 0 10 -353.15 1 0.719 1 scd353.txt:1 scd353.txt:2 scd353.txt:3 scd353.txt:4 1 58 0 10 +Index T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic +50C 323.15 1 0.631 1 scd323.txt:1 scd323.txt:2 scd323.txt:3 scd323.txt:4 1 58 1 10 +60C 333.15 1 0.65 1 scd333.txt:1 scd333.txt:2 scd333.txt:3 scd333.txt:4 0 58 0 10 +65C 338.15 1 0.671 1 scd338.txt:1 scd338.txt:2 scd338.txt:3 scd338.txt:4 1 58 0 10 +80C 353.15 1 0.719 1 scd353.txt:1 scd353.txt:2 scd353.txt:3 scd353.txt:4 1 58 0 10 diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt index c26cf23d5..b2824acc2 100644 --- a/studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt +++ b/studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt @@ -1,68 +1,67 @@ metadata = 'Mao' - T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic -323.15 1 0.631 1 C15 C34 1 58 1 10 - C17 0.198144 C36 0.198144 - C18 0.198128 C37 0.198128 - C19 0.198111 C38 0.198111 - C20 0.198095 C39 0.198095 - C21 0.198079 C40 0.198079 - C22 0.197799 C41 0.197537 - C23 0.198045 C42 0.198046 - C24 0.178844 C43 0.178844 - C25 0.167527 C44 0.178565 - C26 0.148851 C45 0.16751 - C27 0.134117 C46 0.148834 - C28 0.119646 C47 0.1341 - C29 0.100969 C48 0.110956 - C30 0.07546 C49 0.087549 - C31 C50 -333.15 1 0.65 1 C15 C34 0 58 0 10 - C17 0.181121 C36 0.181121 - C18 0.180807 C37 0.180807 - C19 0.181055 C38 0.181055 - C20 0.180741 C39 0.180741 - C21 0.180989 C40 0.180989 - C22 0.168579 C41 0.168579 - C23 0.169109 C42 0.169109 - C24 0.149104 C43 0.149104 - C25 0.138945 C44 0.138945 - C26 0.123439 C45 0.138629 - C27 0.112717 C46 0.123968 - C28 0.098056 C47 0.112121 - C29 0.083396 C48 0.089303 - C30 0.062266 C49 0.070424 - C31 C50 -338.15 1 0.671 1 C15 C34 1 58 0 10 - C17 0.181121 C36 0.181121 - C18 0.180807 C37 0.180807 - C19 0.181055 C38 0.181055 - C20 0.180741 C39 0.180741 - C21 0.180989 C40 0.180989 - C22 0.168579 C41 0.168579 - C23 0.169109 C42 0.169109 - C24 0.149104 C43 0.149104 - C25 0.138945 C44 0.138945 - C26 0.123439 C45 0.138629 - C27 0.112717 C46 0.123968 - C28 0.098056 C47 0.112121 - C29 0.083396 C48 0.089303 - C30 0.062266 C49 0.070424 - C31 C50 -353.15 1 0.719 1 C15 C34 1 58 0 10 - C17 0.162535 C36 0.162535 - C18 0.162817 C37 0.162817 - C19 0.162535 C38 0.162535 - C20 0.162535 C39 0.162535 - C21 0.162817 C40 0.162817 - C22 0.151268 C41 0.151268 - C23 0.142254 C42 0.142254 - C24 0.127606 C43 0.127606 - C25 0.117465 C44 0.117465 - C26 0.101972 C45 0.117183 - C27 0.092676 C46 0.102535 - C28 0.081408 C47 0.092676 - C29 0.068732 C48 0.073239 - C30 0.051267 C49 0.056901 - C31 C50 - +Index T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic + 50C 323.15 1 0.631 1 C15 C34 1 58 1 10 + C17 0.198144 C36 0.198144 + C18 0.198128 C37 0.198128 + C19 0.198111 C38 0.198111 + C20 0.198095 C39 0.198095 + C21 0.198079 C40 0.198079 + C22 0.197799 C41 0.197537 + C23 0.198045 C42 0.198046 + C24 0.178844 C43 0.178844 + C25 0.167527 C44 0.178565 + C26 0.148851 C45 0.16751 + C27 0.134117 C46 0.148834 + C28 0.119646 C47 0.1341 + C29 0.100969 C48 0.110956 + C30 0.07546 C49 0.087549 + C31 C50 + 60C 333.15 1 0.65 1 C15 C34 0 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 + 65C 338.15 1 0.671 1 C15 C34 1 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 + 80C 353.15 1 0.719 1 C15 C34 1 58 0 10 + C17 0.162535 C36 0.162535 + C18 0.162817 C37 0.162817 + C19 0.162535 C38 0.162535 + C20 0.162535 C39 0.162535 + C21 0.162817 C40 0.162817 + C22 0.151268 C41 0.151268 + C23 0.142254 C42 0.142254 + C24 0.127606 C43 0.127606 + C25 0.117465 C44 0.117465 + C26 0.101972 C45 0.117183 + C27 0.092676 C46 0.102535 + C28 0.081408 C47 0.092676 + C29 0.068732 C48 0.073239 + C30 0.051267 C49 0.056901 + C31 C50 diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt index f2bbb57e1..9aadee124 100644 --- a/studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt +++ b/studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt @@ -1,67 +1,67 @@ metadata = 'Mao' -T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic -323.15 1 0.631 1 C15 C34 1 58 1 10 - C17 0.198144 C36 0.198144 - C18 0.198128 C37 0.198128 - C19 0.198111 C38 0.198111 - C20 0.198095 C39 0.198095 - C21 0.198079 C40 0.198079 - C22 0.197799 C41 0.197537 - C23 0.198045 C42 0.198046 - C24 0.178844 C43 0.178844 - C25 0.167527 C44 0.178565 - C26 0.148851 C45 0.16751 - C27 0.134117 C46 0.148834 - C28 0.119646 C47 0.1341 - C29 0.100969 C48 0.110956 - C30 0.07546 C49 0.087549 - C31 C50 -333.15 1 0.65 1 C15 C34 0 58 0 10 - C17 0.181121 C36 0.181121 - C18 0.180807 C37 0.180807 - C19 0.181055 C38 0.181055 - C20 0.180741 C39 0.180741 - C21 0.180989 C40 0.180989 - C22 0.168579 C41 0.168579 - C23 0.169109 C42 0.169109 - C24 0.149104 C43 0.149104 - C25 0.138945 C44 0.138945 - C26 0.123439 C45 0.138629 - C27 0.112717 C46 0.123968 - C28 0.098056 C47 0.112121 - C29 0.083396 C48 0.089303 - C30 0.062266 C49 0.070424 - C31 C50 -338.15 1 0.671 1 C15 C34 1 58 0 10 - C17 0.181121 C36 0.181121 - C18 0.180807 C37 0.180807 - C19 0.181055 C38 0.181055 - C20 0.180741 C39 0.180741 - C21 0.180989 C40 0.180989 - C22 0.168579 C41 0.168579 - C23 0.169109 C42 0.169109 - C24 0.149104 C43 0.149104 - C25 0.138945 C44 0.138945 - C26 0.123439 C45 0.138629 - C27 0.112717 C46 0.123968 - C28 0.098056 C47 0.112121 - C29 0.083396 C48 0.089303 - C30 0.062266 C49 0.070424 - C31 C50 -353.15 1 0.719 1 C15 C34 1 58 0 10 - C17 0.162535 C36 0.162535 - C18 0.162817 C37 0.162817 - C19 0.162535 C38 0.162535 - C20 0.162535 C39 0.162535 - C21 0.162817 C40 0.162817 - C22 0.151268 C41 0.151268 - C23 0.142254 C42 0.142254 - C24 0.127606 C43 0.127606 - C25 0.117465 C44 0.117465 - C26 0.101972 C45 0.117183 - C27 0.092676 C46 0.102535 - C28 0.081408 C47 0.092676 - C29 0.068732 C48 0.073239 - C30 0.051267 C49 0.056901 - C31 C50 +Index T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic +50C 323.15 1 0.631 1 C15 C34 1 58 1 10 + C17 0.198144 C36 0.198144 + C18 0.198128 C37 0.198128 + C19 0.198111 C38 0.198111 + C20 0.198095 C39 0.198095 + C21 0.198079 C40 0.198079 + C22 0.197799 C41 0.197537 + C23 0.198045 C42 0.198046 + C24 0.178844 C43 0.178844 + C25 0.167527 C44 0.178565 + C26 0.148851 C45 0.16751 + C27 0.134117 C46 0.148834 + C28 0.119646 C47 0.1341 + C29 0.100969 C48 0.110956 + C30 0.07546 C49 0.087549 + C31 C50 +60C 333.15 1 0.65 1 C15 C34 0 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 +65C 338.15 1 0.671 1 C15 C34 1 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 +80C 353.15 1 0.719 1 C15 C34 1 58 0 10 + C17 0.162535 C36 0.162535 + C18 0.162817 C37 0.162817 + C19 0.162535 C38 0.162535 + C20 0.162535 C39 0.162535 + C21 0.162817 C40 0.162817 + C22 0.151268 C41 0.151268 + C23 0.142254 C42 0.142254 + C24 0.127606 C43 0.127606 + C25 0.117465 C44 0.117465 + C26 0.101972 C45 0.117183 + C27 0.092676 C46 0.102535 + C28 0.081408 C47 0.092676 + C29 0.068732 C48 0.073239 + C30 0.051267 C49 0.056901 + C31 C50 diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt index 0ec75d7af..de4c22f46 100644 --- a/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt +++ b/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt @@ -1,65 +1,65 @@ -T P Punit MBAR Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic -323.15 1 atm FALSE 0.631 1 C15 C34 1 58 1 10 - C17 0.198144 C36 0.198144 - C18 0.198128 C37 0.198128 - C19 0.198111 C38 0.198111 - C20 0.198095 C39 0.198095 - C21 0.198079 C40 0.198079 - C22 0.197799 C41 0.197537 - C23 0.198045 C42 0.198046 - C24 0.178844 C43 0.178844 - C25 0.167527 C44 0.178565 - C26 0.148851 C45 0.16751 - C27 0.134117 C46 0.148834 - C28 0.119646 C47 0.1341 - C29 0.100969 C48 0.110956 - C30 0.07546 C49 0.087549 - C31 C50 -333.15 1 atm FALSE 0.65 1 C15 C34 0 58 0 10 - C17 0.181121 C36 0.181121 - C18 0.180807 C37 0.180807 - C19 0.181055 C38 0.181055 - C20 0.180741 C39 0.180741 - C21 0.180989 C40 0.180989 - C22 0.168579 C41 0.168579 - C23 0.169109 C42 0.169109 - C24 0.149104 C43 0.149104 - C25 0.138945 C44 0.138945 - C26 0.123439 C45 0.138629 - C27 0.112717 C46 0.123968 - C28 0.098056 C47 0.112121 - C29 0.083396 C48 0.089303 - C30 0.062266 C49 0.070424 - C31 C50 -338.15 1 atm FALSE 0.671 1 C15 C34 1 58 0 10 - C17 0.181121 C36 0.181121 - C18 0.180807 C37 0.180807 - C19 0.181055 C38 0.181055 - C20 0.180741 C39 0.180741 - C21 0.180989 C40 0.180989 - C22 0.168579 C41 0.168579 - C23 0.169109 C42 0.169109 - C24 0.149104 C43 0.149104 - C25 0.138945 C44 0.138945 - C26 0.123439 C45 0.138629 - C27 0.112717 C46 0.123968 - C28 0.098056 C47 0.112121 - C29 0.083396 C48 0.089303 - C30 0.062266 C49 0.070424 - C31 C50 -353.15 1 atm FALSE 0.719 1 C15 C34 1 58 0 10 - C17 0.162535 C36 0.162535 - C18 0.162817 C37 0.162817 - C19 0.162535 C38 0.162535 - C20 0.162535 C39 0.162535 - C21 0.162817 C40 0.162817 - C22 0.151268 C41 0.151268 - C23 0.142254 C42 0.142254 - C24 0.127606 C43 0.127606 - C25 0.117465 C44 0.117465 - C26 0.101972 C45 0.117183 - C27 0.092676 C46 0.102535 - C28 0.081408 C47 0.092676 - C29 0.068732 C48 0.073239 - C30 0.051267 C49 0.056901 - C31 C50 +Index T P Punit MBAR Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic +50C 323.15 1 atm FALSE 0.631 1 C15 C34 1 58 1 10 + C17 0.198144 C36 0.198144 + C18 0.198128 C37 0.198128 + C19 0.198111 C38 0.198111 + C20 0.198095 C39 0.198095 + C21 0.198079 C40 0.198079 + C22 0.197799 C41 0.197537 + C23 0.198045 C42 0.198046 + C24 0.178844 C43 0.178844 + C25 0.167527 C44 0.178565 + C26 0.148851 C45 0.16751 + C27 0.134117 C46 0.148834 + C28 0.119646 C47 0.1341 + C29 0.100969 C48 0.110956 + C30 0.07546 C49 0.087549 + C31 C50 +60C 333.15 1 atm FALSE 0.65 1 C15 C34 0 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 +65C 338.15 1 atm FALSE 0.671 1 C15 C34 1 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 +80C 353.15 1 atm FALSE 0.719 1 C15 C34 1 58 0 10 + C17 0.162535 C36 0.162535 + C18 0.162817 C37 0.162817 + C19 0.162535 C38 0.162535 + C20 0.162535 C39 0.162535 + C21 0.162817 C40 0.162817 + C22 0.151268 C41 0.151268 + C23 0.142254 C42 0.142254 + C24 0.127606 C43 0.127606 + C25 0.117465 C44 0.117465 + C26 0.101972 C45 0.117183 + C27 0.092676 C46 0.102535 + C28 0.081408 C47 0.092676 + C29 0.068732 C48 0.073239 + C30 0.051267 C49 0.056901 + C31 C50 diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv b/studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv index 847381612..354b70778 100644 --- a/studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv +++ b/studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv @@ -1,8 +1,8 @@ -"# Experimental data for liquid, bromine.",,,,, -,,,,, -Temp (K),Density (kg/m^3),w,Hvap (kJ/mol),w,Pressure (bar) -298.15,3102.8,1,29.96,1,1.01325 -,,,,, -# Variables: Denominators and weights for quantities,,,,, -Denoms,=,30,0.3,, -Weights,=,1.0,1.0,, +"# Experimental data for liquid, bromine.",,,,,, +,,,,,, +Index,Temp (K),Density (kg/m^3),w,Hvap (kJ/mol),w,Pressure (bar) +298.15K-1.0atm,298.15,3102.8,1,29.96,1,1.01325 +,,,,,, +# Variables: Denominators and weights for quantities,,,,,, +Denoms,=,30,0.3,,, +Weights,=,1.0,1.0,,, diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt b/studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt index 333f48bbb..155adc470 100644 --- a/studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt +++ b/studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt @@ -1,7 +1,7 @@ "# Experimental data for liquid, bromine." -Temp (K) Density (kg/m^3) w Hvap (kJ/mol) w Pressure (bar) -298.15 3102.8 1 29.96 1 1.01325 +Index Temp (K) Density (kg/m^3) w Hvap (kJ/mol) w Pressure (bar) +0 298.15 3102.8 1 29.96 1 1.01325 # Variables: Denominators and weights for quantities Denoms = 30 0.3 diff --git a/studies/004_thermo_liquid_bromine/test_parse.in b/studies/004_thermo_liquid_bromine/test_parse.in index 28c854b1d..858f4e6ab 100644 --- a/studies/004_thermo_liquid_bromine/test_parse.in +++ b/studies/004_thermo_liquid_bromine/test_parse.in @@ -143,3 +143,14 @@ md_steps 100000 eq_steps 50000 $end +$target +name Lipid_HARD +type Thermo_GMX +weight 1.0 +source lipidcol1.txt +quantities density h_vap +n_sim_chain 2 +md_steps 100000 +eq_steps 50000 +$end + From e0c37850f4093b5f254155b4391ceb65bfde9049 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Sun, 6 Apr 2014 23:06:38 -0700 Subject: [PATCH 05/25] Added unit test for data file parsing (lipid) --- src/thermo.py | 7 +- .../forcefield/about.txt | 0 .../forcefield/bro.itp | 0 .../forcefield/bro.orig.itp | 0 .../optimize.in | 0 .../single.in | 0 .../targets/Lipid_HARD/lipidcol1.txt | 0 .../targets/Lipid_HARD/scd323.txt | 0 .../targets/Lipid_HARD/scd333.txt | 0 .../targets/Lipid_HARD/scd338.txt | 0 .../targets/Lipid_HARD/scd353.txt | 0 .../targets/Lipid_MUL/lipidcol2a.txt | 0 .../targets/Lipid_MUL/scd323.txt | 0 .../targets/Lipid_MUL/scd333.txt | 0 .../targets/Lipid_MUL/scd338.txt | 0 .../targets/Lipid_MUL/scd353.txt | 0 .../targets/Lipid_RIT/lipidcol1.txt | 0 .../targets/Lipid_SPC/lipidcol1.txt | 0 .../targets/Lipid_TAB/lipidcol1.txt | 65 +++++++++++++++++++ .../targets/LiquidBromine/1/sim1.gro | 0 .../targets/LiquidBromine/1/sim1.mdp | 0 .../targets/LiquidBromine/1/sim1.top | 0 .../targets/LiquidBromine/1/sim2.gro | 0 .../targets/LiquidBromine/1/sim2.mdp | 0 .../targets/LiquidBromine/1/sim2.top | 0 .../targets/LiquidBromine/about.txt | 0 .../targets/LiquidBromine/data.csv | 0 .../targets/LiquidBromine/expset.txt | 0 .../targets/LiquidBromine_CSV/data.csv | 0 .../targets/LiquidBromine_TAB/data.tab.txt | 0 .../test_parse.in | 0 .../targets/Lipid_TAB/lipidcol1.txt | 65 ------------------- test/test_system.py | 2 +- test/test_thermo.py | 53 +++++++++++++++ 34 files changed, 123 insertions(+), 69 deletions(-) rename studies/{004_thermo_liquid_bromine => 004_thermo}/forcefield/about.txt (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/forcefield/bro.itp (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/forcefield/bro.orig.itp (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/optimize.in (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/single.in (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_HARD/lipidcol1.txt (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_HARD/scd323.txt (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_HARD/scd333.txt (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_HARD/scd338.txt (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_HARD/scd353.txt (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_MUL/lipidcol2a.txt (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_MUL/scd323.txt (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_MUL/scd333.txt (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_MUL/scd338.txt (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_MUL/scd353.txt (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_RIT/lipidcol1.txt (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_SPC/lipidcol1.txt (100%) create mode 100644 studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/1/sim1.gro (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/1/sim1.mdp (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/1/sim1.top (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/1/sim2.gro (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/1/sim2.mdp (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/1/sim2.top (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/about.txt (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/data.csv (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/expset.txt (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine_CSV/data.csv (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine_TAB/data.tab.txt (100%) rename studies/{004_thermo_liquid_bromine => 004_thermo}/test_parse.in (100%) delete mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt create mode 100644 test/test_thermo.py diff --git a/src/thermo.py b/src/thermo.py index d2eb6ee0d..2579f4059 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -498,7 +498,7 @@ def reffld_error(reason=''): for inum1 in range(inum, len(newcol)): lrow += 1 nrow = ['' for i in range(len(revhead))] - nrow[cidx] = newcol[inum] + nrow[cidx] = newcol[inum1] print "Inserting", nrow, "after row", lrow source.table.insert(lrow, nrow) index.insert(lrow, (saveidx, inum1)) @@ -521,10 +521,11 @@ def reffld_error(reason=''): # pd.DataFrame(OrderedDict([(head,[row[i] for row in source.table]) for i, head in enumerate(revised_heading)])) # print self.Data.__repr__ - raw_input() - + # raw_input() return + # return + fp = open(expdata) line = fp.readline() diff --git a/studies/004_thermo_liquid_bromine/forcefield/about.txt b/studies/004_thermo/forcefield/about.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/forcefield/about.txt rename to studies/004_thermo/forcefield/about.txt diff --git a/studies/004_thermo_liquid_bromine/forcefield/bro.itp b/studies/004_thermo/forcefield/bro.itp similarity index 100% rename from studies/004_thermo_liquid_bromine/forcefield/bro.itp rename to studies/004_thermo/forcefield/bro.itp diff --git a/studies/004_thermo_liquid_bromine/forcefield/bro.orig.itp b/studies/004_thermo/forcefield/bro.orig.itp similarity index 100% rename from studies/004_thermo_liquid_bromine/forcefield/bro.orig.itp rename to studies/004_thermo/forcefield/bro.orig.itp diff --git a/studies/004_thermo_liquid_bromine/optimize.in b/studies/004_thermo/optimize.in similarity index 100% rename from studies/004_thermo_liquid_bromine/optimize.in rename to studies/004_thermo/optimize.in diff --git a/studies/004_thermo_liquid_bromine/single.in b/studies/004_thermo/single.in similarity index 100% rename from studies/004_thermo_liquid_bromine/single.in rename to studies/004_thermo/single.in diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/lipidcol1.txt b/studies/004_thermo/targets/Lipid_HARD/lipidcol1.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/Lipid_HARD/lipidcol1.txt rename to studies/004_thermo/targets/Lipid_HARD/lipidcol1.txt diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd323.txt b/studies/004_thermo/targets/Lipid_HARD/scd323.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd323.txt rename to studies/004_thermo/targets/Lipid_HARD/scd323.txt diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd333.txt b/studies/004_thermo/targets/Lipid_HARD/scd333.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd333.txt rename to studies/004_thermo/targets/Lipid_HARD/scd333.txt diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd338.txt b/studies/004_thermo/targets/Lipid_HARD/scd338.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd338.txt rename to studies/004_thermo/targets/Lipid_HARD/scd338.txt diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd353.txt b/studies/004_thermo/targets/Lipid_HARD/scd353.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd353.txt rename to studies/004_thermo/targets/Lipid_HARD/scd353.txt diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt b/studies/004_thermo/targets/Lipid_MUL/lipidcol2a.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt rename to studies/004_thermo/targets/Lipid_MUL/lipidcol2a.txt diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd323.txt b/studies/004_thermo/targets/Lipid_MUL/scd323.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd323.txt rename to studies/004_thermo/targets/Lipid_MUL/scd323.txt diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd333.txt b/studies/004_thermo/targets/Lipid_MUL/scd333.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd333.txt rename to studies/004_thermo/targets/Lipid_MUL/scd333.txt diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd338.txt b/studies/004_thermo/targets/Lipid_MUL/scd338.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd338.txt rename to studies/004_thermo/targets/Lipid_MUL/scd338.txt diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd353.txt b/studies/004_thermo/targets/Lipid_MUL/scd353.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd353.txt rename to studies/004_thermo/targets/Lipid_MUL/scd353.txt diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt b/studies/004_thermo/targets/Lipid_RIT/lipidcol1.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt rename to studies/004_thermo/targets/Lipid_RIT/lipidcol1.txt diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt b/studies/004_thermo/targets/Lipid_SPC/lipidcol1.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt rename to studies/004_thermo/targets/Lipid_SPC/lipidcol1.txt diff --git a/studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt b/studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt new file mode 100644 index 000000000..c67aece18 --- /dev/null +++ b/studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt @@ -0,0 +1,65 @@ +Index T P Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic +50C 323.15 1 0.631 1 C15 C34 1 58 1 10 + C17 0.198144 C36 0.198144 + C18 0.198128 C37 0.198128 + C19 0.198111 C38 0.198111 + C20 0.198095 C39 0.198095 + C21 0.198079 C40 0.198079 + C22 0.197799 C41 0.197537 + C23 0.198045 C42 0.198046 + C24 0.178844 C43 0.178844 + C25 0.167527 C44 0.178565 + C26 0.148851 C45 0.16751 + C27 0.134117 C46 0.148834 + C28 0.119646 C47 0.1341 + C29 0.100969 C48 0.110956 + C30 0.07546 C49 0.087549 + C31 C50 +60C 333.15 1 0.65 1 C15 C34 0 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 +65C 338.15 1 0.671 1 C15 C34 1 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 +80C 353.15 1 0.719 1 C15 C34 1 58 0 10 + C17 0.162535 C36 0.162535 + C18 0.162817 C37 0.162817 + C19 0.162535 C38 0.162535 + C20 0.162535 C39 0.162535 + C21 0.162817 C40 0.162817 + C22 0.151268 C41 0.151268 + C23 0.142254 C42 0.142254 + C24 0.127606 C43 0.127606 + C25 0.117465 C44 0.117465 + C26 0.101972 C45 0.117183 + C27 0.092676 C46 0.102535 + C28 0.081408 C47 0.092676 + C29 0.068732 C48 0.073239 + C30 0.051267 C49 0.056901 + C31 C50 diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.gro b/studies/004_thermo/targets/LiquidBromine/1/sim1.gro similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.gro rename to studies/004_thermo/targets/LiquidBromine/1/sim1.gro diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.mdp b/studies/004_thermo/targets/LiquidBromine/1/sim1.mdp similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.mdp rename to studies/004_thermo/targets/LiquidBromine/1/sim1.mdp diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.top b/studies/004_thermo/targets/LiquidBromine/1/sim1.top similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.top rename to studies/004_thermo/targets/LiquidBromine/1/sim1.top diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.gro b/studies/004_thermo/targets/LiquidBromine/1/sim2.gro similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.gro rename to studies/004_thermo/targets/LiquidBromine/1/sim2.gro diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.mdp b/studies/004_thermo/targets/LiquidBromine/1/sim2.mdp similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.mdp rename to studies/004_thermo/targets/LiquidBromine/1/sim2.mdp diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.top b/studies/004_thermo/targets/LiquidBromine/1/sim2.top similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.top rename to studies/004_thermo/targets/LiquidBromine/1/sim2.top diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/about.txt b/studies/004_thermo/targets/LiquidBromine/about.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/about.txt rename to studies/004_thermo/targets/LiquidBromine/about.txt diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/data.csv b/studies/004_thermo/targets/LiquidBromine/data.csv similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/data.csv rename to studies/004_thermo/targets/LiquidBromine/data.csv diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt b/studies/004_thermo/targets/LiquidBromine/expset.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt rename to studies/004_thermo/targets/LiquidBromine/expset.txt diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv b/studies/004_thermo/targets/LiquidBromine_CSV/data.csv similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv rename to studies/004_thermo/targets/LiquidBromine_CSV/data.csv diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt b/studies/004_thermo/targets/LiquidBromine_TAB/data.tab.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt rename to studies/004_thermo/targets/LiquidBromine_TAB/data.tab.txt diff --git a/studies/004_thermo_liquid_bromine/test_parse.in b/studies/004_thermo/test_parse.in similarity index 100% rename from studies/004_thermo_liquid_bromine/test_parse.in rename to studies/004_thermo/test_parse.in diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt deleted file mode 100644 index de4c22f46..000000000 --- a/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt +++ /dev/null @@ -1,65 +0,0 @@ -Index T P Punit MBAR Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic -50C 323.15 1 atm FALSE 0.631 1 C15 C34 1 58 1 10 - C17 0.198144 C36 0.198144 - C18 0.198128 C37 0.198128 - C19 0.198111 C38 0.198111 - C20 0.198095 C39 0.198095 - C21 0.198079 C40 0.198079 - C22 0.197799 C41 0.197537 - C23 0.198045 C42 0.198046 - C24 0.178844 C43 0.178844 - C25 0.167527 C44 0.178565 - C26 0.148851 C45 0.16751 - C27 0.134117 C46 0.148834 - C28 0.119646 C47 0.1341 - C29 0.100969 C48 0.110956 - C30 0.07546 C49 0.087549 - C31 C50 -60C 333.15 1 atm FALSE 0.65 1 C15 C34 0 58 0 10 - C17 0.181121 C36 0.181121 - C18 0.180807 C37 0.180807 - C19 0.181055 C38 0.181055 - C20 0.180741 C39 0.180741 - C21 0.180989 C40 0.180989 - C22 0.168579 C41 0.168579 - C23 0.169109 C42 0.169109 - C24 0.149104 C43 0.149104 - C25 0.138945 C44 0.138945 - C26 0.123439 C45 0.138629 - C27 0.112717 C46 0.123968 - C28 0.098056 C47 0.112121 - C29 0.083396 C48 0.089303 - C30 0.062266 C49 0.070424 - C31 C50 -65C 338.15 1 atm FALSE 0.671 1 C15 C34 1 58 0 10 - C17 0.181121 C36 0.181121 - C18 0.180807 C37 0.180807 - C19 0.181055 C38 0.181055 - C20 0.180741 C39 0.180741 - C21 0.180989 C40 0.180989 - C22 0.168579 C41 0.168579 - C23 0.169109 C42 0.169109 - C24 0.149104 C43 0.149104 - C25 0.138945 C44 0.138945 - C26 0.123439 C45 0.138629 - C27 0.112717 C46 0.123968 - C28 0.098056 C47 0.112121 - C29 0.083396 C48 0.089303 - C30 0.062266 C49 0.070424 - C31 C50 -80C 353.15 1 atm FALSE 0.719 1 C15 C34 1 58 0 10 - C17 0.162535 C36 0.162535 - C18 0.162817 C37 0.162817 - C19 0.162535 C38 0.162535 - C20 0.162535 C39 0.162535 - C21 0.162817 C40 0.162817 - C22 0.151268 C41 0.151268 - C23 0.142254 C42 0.142254 - C24 0.127606 C43 0.127606 - C25 0.117465 C44 0.117465 - C26 0.101972 C45 0.117183 - C27 0.092676 C46 0.102535 - C28 0.081408 C47 0.092676 - C29 0.068732 C48 0.073239 - C30 0.051267 C49 0.056901 - C31 C50 diff --git a/test/test_system.py b/test/test_system.py index 927e7e783..9f87b9cd4 100644 --- a/test/test_system.py +++ b/test/test_system.py @@ -179,7 +179,7 @@ def runTest(self): class TestThermoBromineStudy(ForceBalanceTestCase): def setUp(self): super(ForceBalanceTestCase,self).setUp() - os.chdir('studies/004_thermo_liquid_bromine') + os.chdir('studies/004_thermo') def tearDown(self): os.system('rm -rf results *.bak *.tmp') diff --git a/test/test_thermo.py b/test/test_thermo.py new file mode 100644 index 000000000..9baa65e1b --- /dev/null +++ b/test/test_thermo.py @@ -0,0 +1,53 @@ +import unittest +import sys, os, re +import forcebalance +import abc +import numpy +from __init__ import ForceBalanceTestCase +from collections import defaultdict, OrderedDict + +class TestParser(ForceBalanceTestCase): + def setUp(self): + os.chdir(os.path.join(os.getcwd(), 'studies', '004_thermo')) + input_file='test_parse.in' + options, tgt_opts = forcebalance.parser.parse_inputs(input_file) + forcefield = forcebalance.forcefield.FF(options) + self.objective = forcebalance.objective.Objective(options, tgt_opts, forcefield) + + def test_lipid_parser(self): + """Test for equality amongst multiple ways to parse lipid experimental data""" + # Build a dictionary of target name : dataframes + lipid_data = OrderedDict() + for tgt in self.objective.Targets: + if 'lipid' in tgt.name.lower(): + lipid_data[tgt.name] = tgt.Data + # Double loop over different targets + for i, ikey in enumerate(lipid_data.keys()): + for j, jkey in enumerate(lipid_data.keys()): + # Check column headings and row indices + self.assertTrue(all(lipid_data[ikey].columns == lipid_data[jkey].columns), msg='\nColumn headings not equal for %s and %s' % (ikey, jkey)) + self.assertTrue(all(lipid_data[ikey].index == lipid_data[jkey].index), msg='\nRow indices not equal for %s and %s' % (ikey, jkey)) + # Make dictionary representation of dataframes + dicti = lipid_data[ikey].to_dict() + dictj = lipid_data[jkey].to_dict() + # Here's where it gets complicated. + # Loop over data columns. + for column in dicti.keys(): + dseti = defaultdict(set) + dsetj = defaultdict(set) + # For each data column, the dataframe contains a + # set of data which is keyed by the system index. + # Each row is further keyed by the subindex, but + # this test assumes that the subindices are + # irrelevant (equivalent to saying the ordering of + # rows - or the relative vertical position of data + # in cells across columns - is not important. Not + # entirely true but anyway...) + for idx in dicti[column].keys(): + dseti[idx[0]].add(dicti[column][idx]) + dsetj[idx[0]].add(dictj[column][idx]) + self.assertEqual(dseti, dsetj, msg='\n%s data column not equal for targets %s and %s' % (i, ikey, jkey)) + + +if __name__ == '__main__': + unittest.main() From 3d1f598a5adf32453be35cc5dde8bea82077bc39 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Sun, 6 Apr 2014 23:32:11 -0700 Subject: [PATCH 06/25] Clean up --- src/thermo.py | 195 +++++++++++------- .../{Lipid_HARD => Lipid_MIX}/lipidcol1.txt | 0 .../targets/Lipid_MIX/lipidcol2a.txt | 5 + .../{Lipid_HARD => Lipid_MIX}/scd323.txt | 0 .../{Lipid_HARD => Lipid_MIX}/scd333.txt | 0 .../{Lipid_HARD => Lipid_MIX}/scd338.txt | 0 .../{Lipid_HARD => Lipid_MIX}/scd353.txt | 0 studies/004_thermo/test_parse.in | 2 +- 8 files changed, 125 insertions(+), 77 deletions(-) rename studies/004_thermo/targets/{Lipid_HARD => Lipid_MIX}/lipidcol1.txt (100%) create mode 100644 studies/004_thermo/targets/Lipid_MIX/lipidcol2a.txt rename studies/004_thermo/targets/{Lipid_HARD => Lipid_MIX}/scd323.txt (100%) rename studies/004_thermo/targets/{Lipid_HARD => Lipid_MIX}/scd333.txt (100%) rename studies/004_thermo/targets/{Lipid_HARD => Lipid_MIX}/scd338.txt (100%) rename studies/004_thermo/targets/{Lipid_HARD => Lipid_MIX}/scd353.txt (100%) diff --git a/src/thermo.py b/src/thermo.py index 2579f4059..2df195d26 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -63,11 +63,11 @@ def parse(self): self.fields = fields # Skip over empty lines or comment lines. if self.is_empty_line(): - logger.info("\x1b[96mempt\x1b[0m %s\n" % line.replace('\n','')) + logger.debug("\x1b[96mempt\x1b[0m %s\n" % line.replace('\n','')) self.ln += 1 continue if self.is_comment_line(): - logger.info("\x1b[96mcomm\x1b[0m %s\n" % line.replace('\n','')) + logger.debug("\x1b[96mcomm\x1b[0m %s\n" % line.replace('\n','')) self.ln += 1 continue # Indicates metadata mode. @@ -96,19 +96,20 @@ def parse(self): meta[mkey].append(fld) # Set field start, field end, and field content for the header. if is_header: - logger.info("\x1b[1;96mhead\x1b[0m %s\n" % line.replace('\n','')) + logger.debug("\x1b[1;96mhead\x1b[0m %s\n" % line.replace('\n','')) self.process_header() elif is_meta: - logger.info("\x1b[96mmeta\x1b[0m %s\n" % line.replace('\n','')) + logger.debug("\x1b[96mmeta\x1b[0m %s\n" % line.replace('\n','')) else: # Build the row of data to be appended to the table. # Loop through the fields in the header and inserts fields # in the data line accordingly. Ignores trailing tabs/spaces. - logger.info("\x1b[96mdata\x1b[0m %s\n" % line.replace('\n','')) + logger.debug("\x1b[96mdata\x1b[0m %s\n" % line.replace('\n','')) table.append(self.process_data()) self.ln += 1 self.sanity_check() - printcool("%s parsed as %s" % (self.fnm.replace(os.getcwd()+'/',''), self.format), color=6) + if logger.level == DEBUG: + printcool("%s parsed as %s" % (self.fnm.replace(os.getcwd()+'/',''), self.format), color=6) self.metadata = meta self.table = table @@ -330,7 +331,7 @@ def stand_head(head, obs): if len(usplit) > 1: hfirst = usplit[0] punit = re.sub('\)$','',usplit[1].strip()) - print "header", head, "split into", hfirst, ",", punit + logger.debug("header %s split into %s, %s" % (head, hfirst, punit)) else: hfirst = head newh = hfirst @@ -342,13 +343,14 @@ def stand_head(head, obs): if all([o1, o2, o3]): obs = newh if newh != hfirst: - print "header", hfirst, "renamed to", newh + logger.debug("header %s renamed to %s\n" % (hfirst, newh)) return newh, punit, obs class Thermo(Target): """ - A target for fitting general experimental data sets. The - source data is described in a .txt file. + A target for fitting general experimental data sets. The source + data is described in a text file formatted according to the + Specification. """ def __init__(self, options, tgt_opts, forcefield): @@ -402,7 +404,6 @@ def read_source(self, srcfnm): logger.info('Parsing source file %s\n' % srcfnm) source = parse1(srcfnm) printcool_dictionary(source.metadata, title="Metadata") - # print source.table revhead = [] obs = '' @@ -424,7 +425,6 @@ def read_source(self, srcfnm): logger.error('\x1b[91mColumn headings are not unique!\x1b[0m\n') raise RuntimeError - print revhead if revhead[0] != 'index': logger.error('\x1b[91mIndex column heading is not present\x1b[0m\n(Add an Index column on the left!)\n') raise RuntimeError @@ -441,7 +441,6 @@ def read_source(self, srcfnm): fref = OrderedDict() for rn, row in enumerate(source.table): this_insert = [] - # crow = row[1:] thisidx = row[0] if thisidx != '': saveidx = thisidx @@ -457,7 +456,14 @@ def read_source(self, srcfnm): raise RuntimeError snum += 1 if any([':' in fld for fld in row[1:]]): - # Here we insert rows from another data table. + # Here we read rows from another data table. + # Other files may be referenced in the cell of a primary + # table using filename:column_number (numbered from 1). + # Rules: (1) No matter where the filename appears in the column, + # the column is inserted at the beginning of the system index. + # (2) There can only be one file per system index / column. + # (3) The column heading in the secondary file that's being + # referenced must match that of the reference in the primary file. obs2 = '' for cid_, fld in enumerate(row[1:]): if ':' not in fld: continue @@ -486,6 +492,8 @@ def reffld_error(reason=''): reffld_error("Column heading of %s (%s) doesn't match original (%s)" % (fnm, head2, revhead[cid])) fref[(saveidx, revhead[cid])] = [row2[fcol] for row2 in subfile.table] + # Insert the file-referenced data tables appropriately into + # our main data table. for (saveidx, head), newcol in fref.items(): inum = 0 for irow in range(len(source.table)): @@ -499,81 +507,116 @@ def reffld_error(reason=''): lrow += 1 nrow = ['' for i in range(len(revhead))] nrow[cidx] = newcol[inum1] - print "Inserting", nrow, "after row", lrow source.table.insert(lrow, nrow) index.insert(lrow, (saveidx, inum1)) - - # for irow in range( - # for irow1 in range(max(0, len(newcol)-inum)) for rn, row in enumerate(source.table): drows.append([i if i != '' else np.nan for i in row[1:]]) - print revhead[1:] - for rn, row in enumerate(drows): - print index[rn], row - + # Turn it into a pandas DataFrame. self.Data = pd.DataFrame(drows, columns=revhead[1:], index=index) - print repr(self.Data) + return - # pd.DataFrame([OrderedDict([(head, row[i]) for i, head in revised_heading if row[i] != '']) for row in source.table]) + def launch_simulation(self, index, simname): + """ - # pd.DataFrame(OrderedDict([(head,[row[i] for row in source.table]) for i, head in enumerate(revised_heading)])) - # print self.Data.__repr__ - # raw_input() - return + Launch a simulation - either locally or via the Work Queue. + This function is intended to be run within the folder: + target_name/iteration_number/system_index/simulation_name/initial_condition OR + target_name/iteration_number/system_index/simulation_name + + """ + + wq = getWorkQueue() + if not (os.path.exists('result.p') or os.path.exists('result.p.bz2')): + link_dir_contents(os.path.join(self.root,self.rundir),os.getcwd()) + self.last_traj += [os.path.join(os.getcwd(), i) for i in self.extra_output] + self.liquid_mol[simnum%len(self.liquid_mol)].write(self.liquid_coords, ftype='tinker' if self.engname == 'tinker' else None) + cmdstr = '%s python npt.py %s %.3f %.3f' % (self.nptpfx, self.engname, temperature, pressure) + if wq == None: + logger.info("Running condensed phase simulation locally.\n") + logger.info("You may tail -f %s/npt.out in another terminal window\n" % os.getcwd()) + _exec(cmdstr, copy_stderr=True, outfnm='npt.out') + else: + queue_up(wq, command = cmdstr+' &> npt.out', + input_files = self.nptfiles + self.scripts + ['forcebalance.p'], + output_files = ['npt_result.p.bz2', 'npt.out'] + self.extra_output, tgt=self) + + # NAMES FOR OBJECTS! - # return + # Timeseries: Time series of an instantaneous observable that is + # returned by the MD simulation. - fp = open(expdata) + # Observable: A thermodynamic property which can be compared to + # experiment and possesses methods for calculating the property + # and its derivatives. + + # State? Point? What should this be called?? + + # # print revhead[1:] + # # for rn, row in enumerate(drows): + # # print index[rn], row + + # # print repr(self.Data) + + # # # pd.DataFrame([OrderedDict([(head, row[i]) for i, head in revised_heading if row[i] != '']) for row in source.table]) + + + # # # pd.DataFrame(OrderedDict([(head,[row[i] for row in source.table]) for i, head in enumerate(revised_heading)])) + # # # print self.Data.__repr__ + # # # raw_input() + + # # return + + # fp = open(expdata) - line = fp.readline() - foundHeader = False - names = None - units = None - label_header = None - label_unit = None - count = 0 - metadata = {} - while line: - # Skip comments and blank lines - if line.lstrip().startswith("#") or not line.strip(): - line = fp.readline() - continue - # Metadata is denoted using - if "=" in line: # Read variable - param, value = line.split("=") - param = param.strip().lower() - metadata[param] = value - # if param == "denoms": - # for e, v in enumerate(value.split()): - # self.denoms[self.quantities[e]] = float(v) - # elif param == "weights": - # for e, v in enumerate(value.split()): - # self.weights[self.quantities[e]] = float(v) - elif foundHeader: # Read exp data - count += 1 - vals = line.split() - label = (vals[0], label_header, label_unit) - refs = np.array(vals[1:-2:2]).astype(float) - wts = np.array(vals[2:-2:2]).astype(float) - temperature = float(vals[-2]) - pressure = None if vals[-1].lower() == "none" else \ - float(vals[-1]) - dp = Point(count, label=label, refs=refs, weights=wts, - names=names, units=units, - temperature=temperature, pressure=pressure) - self.points.append(dp) - else: # Read headers - foundHeader = True - headers = zip(*[tuple(h.split("_")) for h in line.split() - if h != "w"]) - label_header = list(headers[0])[0] - label_unit = list(headers[1])[0] - names = list(headers[0][1:-2]) - units = list(headers[1][1:-2]) - line = fp.readline() + # line = fp.readline() + # foundHeader = False + # names = None + # units = None + # label_header = None + # label_unit = None + # count = 0 + # metadata = {} + # while line: + # # Skip comments and blank lines + # if line.lstrip().startswith("#") or not line.strip(): + # line = fp.readline() + # continue + # # Metadata is denoted using + # if "=" in line: # Read variable + # param, value = line.split("=") + # param = param.strip().lower() + # metadata[param] = value + # # if param == "denoms": + # # for e, v in enumerate(value.split()): + # # self.denoms[self.quantities[e]] = float(v) + # # elif param == "weights": + # # for e, v in enumerate(value.split()): + # # self.weights[self.quantities[e]] = float(v) + # elif foundHeader: # Read exp data + # count += 1 + # vals = line.split() + # label = (vals[0], label_header, label_unit) + # refs = np.array(vals[1:-2:2]).astype(float) + # wts = np.array(vals[2:-2:2]).astype(float) + # temperature = float(vals[-2]) + # pressure = None if vals[-1].lower() == "none" else \ + # float(vals[-1]) + # dp = Point(count, label=label, refs=refs, weights=wts, + # names=names, units=units, + # temperature=temperature, pressure=pressure) + # self.points.append(dp) + # else: # Read headers + # foundHeader = True + # headers = zip(*[tuple(h.split("_")) for h in line.split() + # if h != "w"]) + # label_header = list(headers[0])[0] + # label_unit = list(headers[1])[0] + # names = list(headers[0][1:-2]) + # units = list(headers[1][1:-2]) + # line = fp.readline() def retrieve(self, dp): """Retrieve the molecular dynamics (MD) results and store the calculated diff --git a/studies/004_thermo/targets/Lipid_HARD/lipidcol1.txt b/studies/004_thermo/targets/Lipid_MIX/lipidcol1.txt similarity index 100% rename from studies/004_thermo/targets/Lipid_HARD/lipidcol1.txt rename to studies/004_thermo/targets/Lipid_MIX/lipidcol1.txt diff --git a/studies/004_thermo/targets/Lipid_MIX/lipidcol2a.txt b/studies/004_thermo/targets/Lipid_MIX/lipidcol2a.txt new file mode 100644 index 000000000..8d97a0bcc --- /dev/null +++ b/studies/004_thermo/targets/Lipid_MIX/lipidcol2a.txt @@ -0,0 +1,5 @@ +Index T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic +50C 323.15 1 0.631 1 scd323.txt:1 scd323.txt:2 scd323.txt:3 scd323.txt:4 1 58 1 10 +60C 333.15 1 0.65 1 scd333.txt:1 scd333.txt:2 scd333.txt:3 scd333.txt:4 0 58 0 10 +65C 338.15 1 0.671 1 scd338.txt:1 scd338.txt:2 scd338.txt:3 scd338.txt:4 1 58 0 10 +80C 353.15 1 0.719 1 scd353.txt:1 scd353.txt:2 scd353.txt:3 scd353.txt:4 1 58 0 10 diff --git a/studies/004_thermo/targets/Lipid_HARD/scd323.txt b/studies/004_thermo/targets/Lipid_MIX/scd323.txt similarity index 100% rename from studies/004_thermo/targets/Lipid_HARD/scd323.txt rename to studies/004_thermo/targets/Lipid_MIX/scd323.txt diff --git a/studies/004_thermo/targets/Lipid_HARD/scd333.txt b/studies/004_thermo/targets/Lipid_MIX/scd333.txt similarity index 100% rename from studies/004_thermo/targets/Lipid_HARD/scd333.txt rename to studies/004_thermo/targets/Lipid_MIX/scd333.txt diff --git a/studies/004_thermo/targets/Lipid_HARD/scd338.txt b/studies/004_thermo/targets/Lipid_MIX/scd338.txt similarity index 100% rename from studies/004_thermo/targets/Lipid_HARD/scd338.txt rename to studies/004_thermo/targets/Lipid_MIX/scd338.txt diff --git a/studies/004_thermo/targets/Lipid_HARD/scd353.txt b/studies/004_thermo/targets/Lipid_MIX/scd353.txt similarity index 100% rename from studies/004_thermo/targets/Lipid_HARD/scd353.txt rename to studies/004_thermo/targets/Lipid_MIX/scd353.txt diff --git a/studies/004_thermo/test_parse.in b/studies/004_thermo/test_parse.in index 858f4e6ab..9a5503e68 100644 --- a/studies/004_thermo/test_parse.in +++ b/studies/004_thermo/test_parse.in @@ -144,7 +144,7 @@ eq_steps 50000 $end $target -name Lipid_HARD +name Lipid_MIX type Thermo_GMX weight 1.0 source lipidcol1.txt From 66f82df9fb4146ff719d387bbf786920a2a8c57a Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Mon, 7 Apr 2014 06:49:46 -0700 Subject: [PATCH 07/25] Create list of Ensembles and table of Observable objects --- src/{quantity.py => observable.py} | 87 +++++++++------- src/parser.py | 5 +- src/thermo.py | 154 ++++++++++++++++++++++------- studies/004_thermo/test_parse.in | 16 +-- 4 files changed, 179 insertions(+), 83 deletions(-) rename src/{quantity.py => observable.py} (81%) diff --git a/src/quantity.py b/src/observable.py similarity index 81% rename from src/quantity.py rename to src/observable.py index 44b137068..4a2e96ebb 100644 --- a/src/quantity.py +++ b/src/observable.py @@ -63,44 +63,49 @@ def energy_driver(mvals_): G[i,:] = EDG[:] return G -class Quantity(object): +class Observable(object): """ - Base class for thermodynamical quantity used for fitting. This can + Base class for thermodynamical observable used for fitting. This can be any experimental data that can be calculated as an ensemble average from a simulation. Data attributes --------------- name : string - Identifier for the quantity that is specified in `quantities` in Target + Identifier for the observable that is specified in `observables` in Target options. - engname : string - Use this engine to extract the quantity from the simulation results. - At present, only `gromacs` is supported. - temperature : float - Calculate the quantity at this temperature (in K). - pressure : float - Calculate the quantity at this pressure (in bar). - + sreq : list of (strings or lists) + The names of simulation types that are needed to calculate + this observable. If a string is added to the list, then that + simulation is *required* to calculate the observable. If a + list is added, then any simulation within that type is allowed, + but the first member of the list is used by default. + treq : list of strings + The names of timeseries from each simulation that are needed to + calculate this observable. + dreq : list of strings + The names of timeseries from each simulation - in addition to + "treq" - that are needed to *differentiate* this observable. + (Usually energy derivatives) """ - def __init__(self, engname, temperature, pressure, name=None): + def __init__(self, source, name=None): self.name = name if name is not None else "empty" - self.engname = engname - self.temperature = temperature - self.pressure = pressure + self.sreq = [] + self.treq = [] + self.dreq = ['energy_derivatives'] def __str__(self): - return "quantity is " + self.name.capitalize() + "." + return "observable is " + self.name.capitalize() + "." def extract(self, engines, FF, mvals, h, AGrad=True): - """Calculate and extract the quantity from MD results. How this is done - depends on the quantity and the engine so this must be + """Calculate and extract the observable from MD results. How this is done + depends on the observable and the engine so this must be implemented in the subclass. Parameters ---------- engines : list - A list of Engine objects that are requred to calculate the quantity. + A list of Engine objects that are requred to calculate the observable. FF : FF Force field object. mvals : list @@ -114,23 +119,29 @@ def extract(self, engines, FF, mvals, h, AGrad=True): ------- result : (float, float, np.array) The returned tuple is (Q, Qerr, Qgrad), where Q is the calculated - quantity, Qerr is the calculated standard deviation of the quantity, + observable, Qerr is the calculated standard deviation of the observable, and Qgrad is a M-array with the calculated gradients for the - quantity, with M being the number of force field parameters that are + observable, with M being the number of force field parameters that are being fitted. """ logger.error("Extract method not implemented in base class.\n") raise NotImplementedError -# class Quantity_Density -class Quantity_Density(Quantity): - def __init__(self, engname, temperature, pressure, name=None): +# class Observable_Density +class Observable_Density(Observable): + def __init__(self, source, name=None): """ Density. """ - super(Quantity_Density, self).__init__(engname, temperature, pressure, name) + super(Observable_Density, self).__init__(source, name) self.name = name if name is not None else "density" + # Calculating the density requires either a liquid or solid simulation. + self.sreq = [['liquid', 'solid']] + + # Requires timeseries of densities from the simulation. + self.treq = ['density'] + def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): #==========================================# # Physical constants and local variables. # @@ -182,9 +193,9 @@ def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): color=4, bold=True) G = energy_derivatives(engines[0], FF, mvals, h, pgrad, len(Energy), AGrad) - #=======================================# - # Quantity properties and derivatives. # - #=======================================# + #=========================================# + # Observable properties and derivatives. # + #=========================================# # Average and error. Rho_avg, Rho_err = mean_stderr(Density) # Analytic first derivative. @@ -193,14 +204,20 @@ def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): return Rho_avg, Rho_err, Rho_grad -# class Quantity_H_vap -class Quantity_H_vap(Quantity): - def __init__(self, engname, temperature, pressure, name=None): +# class Observable_H_vap +class Observable_H_vap(Observable): + def __init__(self, source, name=None): """ Enthalpy of vaporization. """ - super(Quantity_H_vap, self).__init__(engname, temperature, pressure, name) + super(Observable_H_vap, self).__init__(source, name) self.name = name if name is not None else "H_vap" + # Calculating the heat of vaporization requires a liquid simulation and a gas simulation. + self.sreq = ['liquid', 'gas'] + + # Requires timeseries of energies and volumes from the simulation. + self.treq = ['energy', 'volume'] + def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): #==========================================# # Physical constants and local variables. # @@ -274,9 +291,9 @@ def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): G = energy_derivatives(engines[0], FF, mvals, h, pgrad, len(Energy), AGrad) Gm = energy_derivatives(engines[1], FF, mvals, h, pgrad, len(mEnergy), AGrad) - #=======================================# - # Quantity properties and derivatives. # - #=======================================# + #=========================================# + # Observable properties and derivatives. # + #=========================================# # Average and error. E_avg, E_err = mean_stderr(Energy) Em_avg, Em_err = mean_stderr(mEnergy) diff --git a/src/parser.py b/src/parser.py index 40b90a8df..340b45723 100644 --- a/src/parser.py +++ b/src/parser.py @@ -157,7 +157,7 @@ "engine" : (None, 180, 'The external code used to execute the simulations (GMX, TINKER, AMBER, OpenMM)', 'All targets (important)', '') }, 'lists' : {"fd_ptypes" : ([], -100, 'The parameter types that are differentiated using finite difference', 'In conjunction with fdgrad, fdhess, fdhessdiag; usually not needed'), - "quantities" : ([], 100, 'List of quantities to be fitted, each must have corresponding Quantity subclass', 'Thermodynamic properties target', 'thermo'), + "observables" : ([], 100, 'List of observables to be fitted, each must have corresponding Quantity subclass', 'Thermodynamic properties target', 'thermo'), }, 'ints' : {"shots" : (-1, 0, 'Number of snapshots; defaults to all of the snapshots', 'Energy + Force Matching', 'AbInitio'), "fitatoms" : (0, 0, 'Number of fitting atoms; defaults to all of them', 'Energy + Force Matching', 'AbInitio'), @@ -174,7 +174,7 @@ "save_traj" : (0, -10, 'Whether to save trajectories. 0 = Never save; 1 = Delete if optimization step is good; 2 = Always save', 'Condensed phase properties', 'Liquid, Lipid'), "eq_steps" : (20000, 0, 'Number of time steps for the equilibration run.', 'Thermodynamic property targets', 'thermo'), "md_steps" : (50000, 0, 'Number of time steps for the production run.', 'Thermodynamic property targets', 'thermo'), - "n_sim_chain" : (1, 0, 'Number of simulations required to calculate quantities.', 'Thermodynamic property targets', 'thermo'), + "n_sim_chain" : (1, 0, 'Number of simulations required to calculate observables.', 'Thermodynamic property targets', 'thermo'), }, 'bools' : {"whamboltz" : (0, -100, 'Whether to use WHAM Boltzmann Weights', 'Ab initio targets with Boltzmann weights (advanced usage)', 'AbInitio'), "sampcorr" : (0, -150, 'Whether to use the archaic sampling correction', 'Energy + Force Matching, very old option, do not use', 'AbInitio'), @@ -285,6 +285,7 @@ "lipid_prod_steps" : "lipid_md_steps", "lipid_equ_steps" : "lipid_eq_steps", "expdata_txt" : "source", + "quantities" : "observables", } ## Listing of sections in the input file. diff --git a/src/thermo.py b/src/thermo.py index 2df195d26..66c8115d0 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -7,6 +7,7 @@ import itertools import cStringIO +from forcebalance.observable import * from forcebalance.target import Target from forcebalance.finite_difference import in_fd from forcebalance.nifty import flat, col, row, isint @@ -346,6 +347,17 @@ def stand_head(head, obs): logger.debug("header %s renamed to %s\n" % (hfirst, newh)) return newh, punit, obs +def determine_needed_simulations(observables): + + """ Given a list of Observable objects, determine the list of + simulations that are needed to calculate all of them. """ + + sreqs = OrderedDict() + for obs in observables: + sreqs[obs.name] = obs.sreq[:] + print sreqs + + class Thermo(Target): """ A target for fitting general experimental data sets. The source @@ -360,8 +372,8 @@ def __init__(self, options, tgt_opts, forcefield): ## Parameters # Source data (experimental data, model parameters and weights) self.set_option(tgt_opts, "source", forceprint=True) - # Quantities to calculate - self.set_option(tgt_opts, "quantities", forceprint=True) + # Observables to calculate + self.set_option(tgt_opts, "observables", "observable_names", forceprint=True) # Length of simulation chain self.set_option(tgt_opts, "n_sim_chain", forceprint=True) # Number of time steps in the equilibration run @@ -372,21 +384,30 @@ def __init__(self, options, tgt_opts, forcefield): ## Variables # Prefix names for simulation data self.simpfx = "sim" - # Data points for quantities + # Data points for observables self.points = [] - # Denominators for quantities + # Denominators for observables self.denoms = {} - # Weights for quantities + # Weights for observables self.weights = {} - ## Read source data and initialize points + ## A mapping that takes us from observable names to Observable objects. + self.Observable_Map = {'density' : Observable_Density, + 'rho' : Observable_Density, + 'hvap' : Observable_H_vap, + 'h_vap' : Observable_H_vap} + + ## Read source data and initialize points; creates self.Data, self.Ensembles and self.Observables objects. self.read_source(os.path.join(self.root, self.tgtdir, self.source)) ## Copy run scripts from ForceBalance installation directory for f in self.scripts: LinkFile(os.path.join(os.path.split(__file__)[0], "data", f), os.path.join(self.root, self.tempdir, f)) - + + ## Set up simulations + self.prepare_simulations() + def read_source(self, srcfnm): """Read and store source data. @@ -406,7 +427,7 @@ def read_source(self, srcfnm): printcool_dictionary(source.metadata, title="Metadata") revhead = [] obs = '' - + obsnames = [] units = defaultdict(str) @@ -415,6 +436,7 @@ def read_source(self, srcfnm): revhead.append('index') continue newh, punit, obs = stand_head(head, obs) + if obs not in obsnames + ['temp', 'pres', 'n_ic']: obsnames.append(obs) revhead.append(newh) if punit != '': units[newh] = punit @@ -514,7 +536,63 @@ def reffld_error(reason=''): drows.append([i if i != '' else np.nan for i in row[1:]]) # Turn it into a pandas DataFrame. - self.Data = pd.DataFrame(drows, columns=revhead[1:], index=index) + self.Data = pd.DataFrame(drows, columns=revhead[1:], index=pd.MultiIndex.from_tuples(index, names=['ensemble', 'subindex'])) + + # A list of ensembles (i.e. top-level indices) which correspond + # to sets of simulations that we'll be running. + self.Ensembles = [] + for idx in self.Data.index: + if idx[0] not in self.Ensembles: + self.Ensembles.append(idx[0]) + + # A list of Observable objects (i.e. column headings) which + # contain methods for calculating observables that we need. + # Think about: + # (1) How much variability is allowed across Ensembles? + # For instance, different S_cd is permissible. + self.Observables = OrderedDict() + for obsname in [stand_head(i, '')[2] for i in self.observable_names]: + if obsname in self.Observables: + logger.error('%s was already specified as an observable' % (obsname)) + self.Observables[obsname] = OrderedDict() + for ie, ensemble in enumerate(self.Ensembles): + if obsname in self.Observable_Map: + newobs = self.Observable_Map[obsname](source=self.Data.ix[ensemble]) + logger.info('%s is specified as an observable, appending %s class\n' % (obsname, newobs.__class__.__name__)) + self.Observables[obsname][ensemble] = newobs + else: + logger.warn('%s is specified but there is no corresponding Observable class, appending empty one\n' % obsname) + self.Observables[obsname][ensemble] = Observable(name=obsname, source=self.Data.ix[ensemble]) + + # for ensemble in self.Ensembles: + # self.Observables[ensemble] = [] + # for obsname in obsnames: + # for ensemble, ie in enumerate(self.Ensembles): + # if obsname in self.Observable_Map: + # newobs = self.Observable_Map[obsname](source=self.Data.ix[ensemble]) + # if newobs.name in [obs.name for obs in self.Observables[ensemble]]: + # logger.error('%s is specified but a %s observable already exists' % (obsname, newobs.__class__.__name__)) + # logger.info('%s is specified as an observable, appending %s class\n' % (obsname, newobs.__class__.__name__)) + # self.Observables[ensemble].append(newobs) + # else: + # logger.warn('%s is specified but there is no corresponding Observable class, appending empty one\n' % obsname) + # self.Observables[ensemble].append(Observable(name=obsname, source=self.Data.ix[ensemble])) + return + + def prepare_simulations(self): + + """ + + Prepare simulations to be launched. Set initial conditions + and create directories. This function is intended to be run + at the start of each optimization cycle, so that initial + conditions may be easily set. + + """ + + # The list of simulations that we'll be running. + self.Simulations = OrderedDict([(i, []) for i in self.Ensembles]) + return def launch_simulation(self, index, simname): @@ -591,10 +669,10 @@ def launch_simulation(self, index, simname): # metadata[param] = value # # if param == "denoms": # # for e, v in enumerate(value.split()): - # # self.denoms[self.quantities[e]] = float(v) + # # self.denoms[self.observables[e]] = float(v) # # elif param == "weights": # # for e, v in enumerate(value.split()): - # # self.weights[self.quantities[e]] = float(v) + # # self.weights[self.observables[e]] = float(v) # elif foundHeader: # Read exp data # count += 1 # vals = line.split() @@ -620,12 +698,12 @@ def launch_simulation(self, index, simname): def retrieve(self, dp): """Retrieve the molecular dynamics (MD) results and store the calculated - quantities in the Point object dp. + observables in the Point object dp. Parameters ---------- dp : Point - Store the calculated quantities in this point. + Store the calculated observables in this point. Returns ------- @@ -647,9 +725,9 @@ def retrieve(self, dp): msg = 'The file ' + abspath + ' does not exist so we cannot read it.\n' logger.warning(msg) - dp.data["values"] = np.zeros((len(self.quantities))) - dp.data["errors"] = np.zeros((len(self.quantities))) - dp.data["grads"] = np.zeros((len(self.quantities), self.FF.np)) + dp.data["values"] = np.zeros((len(self.observables))) + dp.data["errors"] = np.zeros((len(self.observables))) + dp.data["grads"] = np.zeros((len(self.observables), self.FF.np)) def submit_jobs(self, mvals, AGrad=True, AHess=True): """This routine is called by Objective.stage() and will run before "get". @@ -696,7 +774,7 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True): # Run the simulation chain for point. cmdstr = ("%s python md_chain.py " % self.mdpfx + - " ".join(self.quantities) + " " + + " ".join(self.observables) + " " + "--engine %s " % self.engname + "--length %d " % self.n_sim_chain + "--name %s " % self.simpfx + @@ -735,7 +813,7 @@ def print_item(key, physunit): (self.Xp[key], self.Wp[key], self.Xp[key]*self.Wp[key]))) - for i, q in enumerate(self.quantities): + for i, q in enumerate(self.observables): print_item(q, self.points[0].ref["units"][i]) PrintDict['Total'] = "% 10s % 8s % 14.5e" % ("","", self.Objective) @@ -745,14 +823,14 @@ def print_item(key, physunit): printcool_dictionary(PrintDict, color=4, title=Title, keywidth=31) return - def objective_term(self, quantity): + def objective_term(self, observable): """Calculates the contribution to the objective function (the term) for a - given quantity. + given observable. Parameters ---------- - quantity : string - Calculate the objective term for this quantity. + observable : string + Calculate the objective term for this observable. Returns ------- @@ -767,18 +845,18 @@ def objective_term(self, quantity): Gradient = np.zeros(self.FF.np) Hessian = np.zeros((self.FF.np, self.FF.np)) - # Grab ref data for quantity - qid = self.quantities.index(quantity) + # Grab ref data for observable + qid = self.observables.index(observable) Exp = np.array([pt.ref["refs"][qid] for pt in self.points]) Weights = np.array([pt.ref["weights"][qid] for pt in self.points]) - Denom = self.denoms[quantity] + Denom = self.denoms[observable] # Renormalize weights Weights /= np.sum(Weights) logger.info("Renormalized weights to " + str(np.sum(Weights)) + "\n") - logger.info(("Physical quantity '%s' uses denominator = %g %s\n" % - (quantity.capitalize(), Denom, - self.points[0].ref["units"][self.quantities.index(quantity)]))) + logger.info(("Physical observable '%s' uses denominator = %g %s\n" % + (observable.capitalize(), Denom, + self.points[0].ref["units"][self.observables.index(observable)]))) # Grab calculated values values = np.array([pt.data["values"][qid] for pt in self.points]) @@ -814,7 +892,7 @@ def objective_term(self, quantity): GradMapPrint.append([' %8.2f %8.1f' % (temp, press)] + ["% 9.3e" % i for i in grads[pt.idnr-1]]) - o = wopen('gradient_%s.dat' % quantity) + o = wopen('gradient_%s.dat' % observable) for line in GradMapPrint: print >> o, ' '.join(line) o.close() @@ -831,7 +909,7 @@ def objective_term(self, quantity): def get(self, mvals, AGrad=True, AHess=True): """Return the contribution to the total objective function. This is a - weighted average of the calculated quantities. + weighted average of the calculated observables. Parameters ---------- @@ -863,16 +941,16 @@ def get(self, mvals, AGrad=True, AHess=True): obj = OrderedDict() reweighted = [] - for q in self.quantities: + for q in self.observables: # Returns dict with keys "X"=objective term value, "G"=the # gradient, "H"=the hessian, and "info"=printed info about points obj[q] = self.objective_term(q) - # Apply weights for quantities (normalized) + # Apply weights for observables (normalized) if obj[q]["X"] == 0: self.weights[q] = 0.0 - # Store weights sorted in the order of self.quantities + # Store weights sorted in the order of self.observables reweighted.append(self.weights[q]) # Normalize weights @@ -880,16 +958,16 @@ def get(self, mvals, AGrad=True, AHess=True): wtot = np.sum(reweighted) reweighted = reweighted/wtot if wtot > 0 else reweighted - # Picks out the "X", "G" and "H" keys for the quantities sorted in the - # order of self.quantities. Xs is N-array, Gs is NxM-array and Hs is - # NxMxM-array, where N is number of quantities and M is number of + # Picks out the "X", "G" and "H" keys for the observables sorted in the + # order of self.observables. Xs is N-array, Gs is NxM-array and Hs is + # NxMxM-array, where N is number of observables and M is number of # parameters. Xs = np.array([dic["X"] for dic in obj.values()]) Gs = np.array([dic["G"] for dic in obj.values()]) Hs = np.array([dic["H"] for dic in obj.values()]) # Target contribution is (normalized) weighted averages of the - # individual quantity terms. + # individual observable terms. Objective = np.average(Xs, weights=(None if np.all(reweighted == 0) else \ reweighted), axis=0) if AGrad: @@ -902,7 +980,7 @@ def get(self, mvals, AGrad=True, AHess=True): if not in_fd(): # Store results to show with indicator() function self.Xp = {q : dic["X"] for (q, dic) in obj.items()} - self.Wp = {q : reweighted[self.quantities.index(q)] + self.Wp = {q : reweighted[self.observables.index(q)] for (q, dic) in obj.items()} self.Pp = {q : dic["info"] for (q, dic) in obj.items()} diff --git a/studies/004_thermo/test_parse.in b/studies/004_thermo/test_parse.in index 9a5503e68..4eb82ab64 100644 --- a/studies/004_thermo/test_parse.in +++ b/studies/004_thermo/test_parse.in @@ -71,7 +71,7 @@ name LiquidBromine type Thermo_GMX weight 1.0 source expset.txt -quantities density h_vap +observables density h_vap n_sim_chain 2 md_steps 100000 eq_steps 50000 @@ -82,7 +82,7 @@ name LiquidBromine_CSV type Thermo_GMX weight 1.0 source data.csv -quantities density h_vap +observables density h_vap n_sim_chain 2 md_steps 100000 eq_steps 50000 @@ -93,7 +93,7 @@ name LiquidBromine_TAB type Thermo_GMX weight 1.0 source data.tab.txt -quantities density h_vap +observables density h_vap n_sim_chain 2 md_steps 100000 eq_steps 50000 @@ -104,7 +104,7 @@ name Lipid_SPC type Thermo_GMX weight 1.0 source lipidcol1.txt -quantities density h_vap +observables al scd kappa n_sim_chain 2 md_steps 100000 eq_steps 50000 @@ -115,7 +115,7 @@ name Lipid_RIT type Thermo_GMX weight 1.0 source lipidcol1.txt -quantities density h_vap +observables al scd kappa n_sim_chain 2 md_steps 100000 eq_steps 50000 @@ -126,7 +126,7 @@ name Lipid_TAB type Thermo_GMX weight 1.0 source lipidcol1.txt -quantities density h_vap +observables al scd kappa n_sim_chain 2 md_steps 100000 eq_steps 50000 @@ -137,7 +137,7 @@ name Lipid_MUL type Thermo_GMX weight 1.0 source lipidcol2a.txt -quantities density h_vap +observables al scd kappa n_sim_chain 2 md_steps 100000 eq_steps 50000 @@ -148,7 +148,7 @@ name Lipid_MIX type Thermo_GMX weight 1.0 source lipidcol1.txt -quantities density h_vap +observables al scd kappa n_sim_chain 2 md_steps 100000 eq_steps 50000 From e3676f2887e79ca1ed0d2e5d68720a415c8c9afc Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Mon, 7 Apr 2014 07:08:39 -0700 Subject: [PATCH 08/25] Map observable names to required simulations --- src/thermo.py | 66 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 12 deletions(-) diff --git a/src/thermo.py b/src/thermo.py index 66c8115d0..1d752e3de 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -1,6 +1,7 @@ import os import re import csv +import copy import errno import numpy as np import pandas as pd @@ -347,17 +348,6 @@ def stand_head(head, obs): logger.debug("header %s renamed to %s\n" % (hfirst, newh)) return newh, punit, obs -def determine_needed_simulations(observables): - - """ Given a list of Observable objects, determine the list of - simulations that are needed to calculate all of them. """ - - sreqs = OrderedDict() - for obs in observables: - sreqs[obs.name] = obs.sreq[:] - print sreqs - - class Thermo(Target): """ A target for fitting general experimental data sets. The source @@ -579,6 +569,57 @@ def reffld_error(reason=''): # self.Observables[ensemble].append(Observable(name=obsname, source=self.Data.ix[ensemble])) return + def determine_simulations(self): + + """ + Determine which simulations need to be run. The same + simulations are run for each ensemble in the data set. + """ + + # Determine which simulations are needed. + sreqs = OrderedDict() + for obsname in self.Observables: + sreqs[obsname] = self.Observables[obsname][self.Ensembles[0]].sreq + + def narrow(): + # Get the names of simulations that are REQUIRED to calculate the observables. + toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs])) + # Whoa, this is a deeply nested loop. What does it do? + # First loop over the elements in "sreqs" for each observable name. + # If the element is a string, then it's a required simulation name (top level). + # If the element is a list, then it's a list of valid simulation names + # and we need to narrow the list down. + # For the ones that are lists (and have any intersection with the top level), + # delete the ones that don't intersect. + sreq0 = copy.deepcopy(sreqs) + for obsname in sreqs: + for sims in sreqs[obsname]: + if type(sims) == list: + if len(sims) == 1: + sreqs[obsname] = [sims[0]] + elif any([i in sims for i in toplevel]): + for j in sims: + if j not in toplevel: sims.remove(j) + return sreqs != sreq0 + + print sreqs + while narrow(): + print sreqs + # For the leftover observables where there is still some ambiguity, + # we attempt + # To do: Figure this out from existing initial conditions maybe + for obsname in sreqs: + for sims in sreqs[obsname]: + if type(sims) == list: + for sim in sims: + if has_ic(sim): + sreqs[obsname] = [sim] + + + self.Simulations = OrderedDict([(i, []) for i in self.Ensembles]) + + return + def prepare_simulations(self): """ @@ -589,7 +630,8 @@ def prepare_simulations(self): conditions may be easily set. """ - + # print narrow() + # The list of simulations that we'll be running. self.Simulations = OrderedDict([(i, []) for i in self.Ensembles]) From 2b6df36aa274cdd6f36e9c8151255091df5d402c Mon Sep 17 00:00:00 2001 From: Lee-Ping Date: Tue, 8 Apr 2014 01:33:05 -0700 Subject: [PATCH 09/25] Start modifying framework to require user input simulations. --- src/observable.py | 24 +- src/thermo.py | 319 +++++++++++++++--- studies/004_thermo/single.in | 2 +- .../LiquidBromine/1/{sim2.gro => gas.gro} | 0 .../LiquidBromine/1/{sim2.mdp => gas.mdp} | 0 .../LiquidBromine/1/{sim2.top => gas.top} | 0 .../LiquidBromine/1/{sim1.gro => liquid.gro} | 0 .../LiquidBromine/1/{sim1.mdp => liquid.mdp} | 0 .../LiquidBromine/1/{sim1.top => liquid.top} | 0 .../targets/LiquidBromine/expset.txt | 2 +- 10 files changed, 289 insertions(+), 58 deletions(-) rename studies/004_thermo/targets/LiquidBromine/1/{sim2.gro => gas.gro} (100%) rename studies/004_thermo/targets/LiquidBromine/1/{sim2.mdp => gas.mdp} (100%) rename studies/004_thermo/targets/LiquidBromine/1/{sim2.top => gas.top} (100%) rename studies/004_thermo/targets/LiquidBromine/1/{sim1.gro => liquid.gro} (100%) rename studies/004_thermo/targets/LiquidBromine/1/{sim1.mdp => liquid.mdp} (100%) rename studies/004_thermo/targets/LiquidBromine/1/{sim1.top => liquid.top} (100%) diff --git a/src/observable.py b/src/observable.py index 4a2e96ebb..a1139105a 100644 --- a/src/observable.py +++ b/src/observable.py @@ -213,10 +213,10 @@ def __init__(self, source, name=None): self.name = name if name is not None else "H_vap" # Calculating the heat of vaporization requires a liquid simulation and a gas simulation. - self.sreq = ['liquid', 'gas'] + self.sreq = [['liquid'], ['gas']] # Requires timeseries of energies and volumes from the simulation. - self.treq = ['energy', 'volume'] + self.treq = [['energy', 'volume'], ['energy']] def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): #==========================================# @@ -315,3 +315,23 @@ def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): return Hvap_avg, Hvap_err, Hvap_grad +# class Observable_Kappa +class Observable_Kappa(Observable): + def __init__(self, source, name=None): + """ Compressibility (applies to liquid and lipid bilayer.) """ + super(Observable_H_vap, self).__init__(source, name) + + self.name = name if name is not None else "H_vap" + + # List of dictionaries of simulations, keyed to timeseries to extract from the simulation. + # Each dictionary represents a simulation in a sequence, but the observable isn't mapped to a unique simulation each time. + # Because of this, we determine which simulation to launch (in the sequence) based on the available initial coordinates (or explicit user input). + # Depending on which simulation is executed, we require different timeseries from the simulation, and different formulas. + # But another way is to just define two observables ... need to think about it. + + self.sreq = [{'liquid':['volume'], 'bilayer':['al']}, + ] + + + # Requires timeseries of energies and volumes from the simulation. + self.treq = [['energy', 'volume'], ['energy']] diff --git a/src/thermo.py b/src/thermo.py index 1d752e3de..93e50e866 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -380,14 +380,19 @@ def __init__(self, options, tgt_opts, forcefield): self.denoms = {} # Weights for observables self.weights = {} + # Suffixes for coordinate files + self.crdsfx = {'gromacs':['.gro', '.pdb'], + 'tinker':['.xyz', '.arc'], + 'openmm':['.pdb']}[self.engname.lower()] ## A mapping that takes us from observable names to Observable objects. self.Observable_Map = {'density' : Observable_Density, 'rho' : Observable_Density, 'hvap' : Observable_H_vap, 'h_vap' : Observable_H_vap} + - ## Read source data and initialize points; creates self.Data, self.Ensembles and self.Observables objects. + ## Read source data and initialize points; creates self.Data, self.Indices and self.Observables objects. self.read_source(os.path.join(self.root, self.tgtdir, self.source)) ## Copy run scripts from ForceBalance installation directory @@ -396,7 +401,7 @@ def __init__(self, options, tgt_opts, forcefield): os.path.join(self.root, self.tempdir, f)) ## Set up simulations - self.prepare_simulations() + #self.determine_simulations() def read_source(self, srcfnm): """Read and store source data. @@ -526,97 +531,269 @@ def reffld_error(reason=''): drows.append([i if i != '' else np.nan for i in row[1:]]) # Turn it into a pandas DataFrame. - self.Data = pd.DataFrame(drows, columns=revhead[1:], index=pd.MultiIndex.from_tuples(index, names=['ensemble', 'subindex'])) + self.Data = pd.DataFrame(drows, columns=revhead[1:], index=pd.MultiIndex.from_tuples(index, names=['index', 'subindex'])) - # A list of ensembles (i.e. top-level indices) which correspond + # A list of indices (i.e. top-level indices) which correspond # to sets of simulations that we'll be running. - self.Ensembles = [] + self.Indices = [] for idx in self.Data.index: - if idx[0] not in self.Ensembles: - self.Ensembles.append(idx[0]) + if idx[0] not in self.Indices: + self.Indices.append(idx[0]) # A list of Observable objects (i.e. column headings) which # contain methods for calculating observables that we need. # Think about: - # (1) How much variability is allowed across Ensembles? + # (1) How much variability is allowed across Indices? # For instance, different S_cd is permissible. self.Observables = OrderedDict() for obsname in [stand_head(i, '')[2] for i in self.observable_names]: if obsname in self.Observables: logger.error('%s was already specified as an observable' % (obsname)) self.Observables[obsname] = OrderedDict() - for ie, ensemble in enumerate(self.Ensembles): + for ie, index in enumerate(self.Indices): if obsname in self.Observable_Map: - newobs = self.Observable_Map[obsname](source=self.Data.ix[ensemble]) + newobs = self.Observable_Map[obsname](source=self.Data.ix[index]) logger.info('%s is specified as an observable, appending %s class\n' % (obsname, newobs.__class__.__name__)) - self.Observables[obsname][ensemble] = newobs + self.Observables[obsname][index] = newobs else: logger.warn('%s is specified but there is no corresponding Observable class, appending empty one\n' % obsname) - self.Observables[obsname][ensemble] = Observable(name=obsname, source=self.Data.ix[ensemble]) - - # for ensemble in self.Ensembles: - # self.Observables[ensemble] = [] + self.Observables[obsname][index] = Observable(name=obsname, source=self.Data.ix[index]) + + print self.Indices + print self.Observables + print repr(self.Data) + raw_input() + # for index in self.Indices: + # self.Observables[index] = [] # for obsname in obsnames: - # for ensemble, ie in enumerate(self.Ensembles): + # for index, ie in enumerate(self.Indices): # if obsname in self.Observable_Map: - # newobs = self.Observable_Map[obsname](source=self.Data.ix[ensemble]) - # if newobs.name in [obs.name for obs in self.Observables[ensemble]]: + # newobs = self.Observable_Map[obsname](source=self.Data.ix[index]) + # if newobs.name in [obs.name for obs in self.Observables[index]]: # logger.error('%s is specified but a %s observable already exists' % (obsname, newobs.__class__.__name__)) # logger.info('%s is specified as an observable, appending %s class\n' % (obsname, newobs.__class__.__name__)) - # self.Observables[ensemble].append(newobs) + # self.Observables[index].append(newobs) # else: # logger.warn('%s is specified but there is no corresponding Observable class, appending empty one\n' % obsname) - # self.Observables[ensemble].append(Observable(name=obsname, source=self.Data.ix[ensemble])) + # self.Observables[index].append(Observable(name=obsname, source=self.Data.ix[index])) return + def find_ic(self, index, stype, icn): + """ + Search for a suitable initial condition file. + + Initial condition files will be searched for in the following priority: + targets/target_name/index/stype/ICs/stype_#.xyz + targets/target_name/index/stype/ICs/stype#.xyz + targets/target_name/index/stype/ICs/#.xyz + targets/target_name/index/stype/ICs/stype.xyz + targets/target_name/index/stype/ICs/coords.xyz + targets/target_name/index/stype/stype.xyz + targets/target_name/index/stype/coords.xyz + targets/target_name/index/stype.xyz + targets/target_name/stype.xyz + """ + found = '' + basefnms = [(os.path.join(index, stype, 'ICs', stype+'_'+("%i" % icn)), True), + (os.path.join(index, stype, 'ICs', stype+("%i" % icn)), True), + (os.path.join(index, stype, 'ICs', ("%i" % icn)), True), + (os.path.join(index, stype, 'ICs', stype), False), + (os.path.join(index, stype, 'ICs', 'coords'), False), + (os.path.join(index, stype, stype), False), + (os.path.join(index, stype, 'coords'), False), + (os.path.join(index, stype), False), + (os.path.join(stype), False)] + paths = [] + for fnm, numbered in basefnms: + for crdsfx in self.crdsfx: + fpath = os.path.join(self.tgtdir, fnm+crdsfx) + paths.append(fpath) + if os.path.exists(fpath): + if found != '': + logger.info('Target %s Index %s Simulation %s : ' + '%s overrides %s\n' % (self.name, index, stype, fpath)) + else: + if not numbered: + M = Molecule(fpath) + if len(M) <= icn: + logger.error("Target %s Index %s Simulation %s : " + "initial coordinate file %s doesn't have enough structures\n" % + (self.name, index, stype, fpath)) + raise RuntimeError + logger.info('Target %s Index %s Simulation %s : ' + 'found initial coordinate file %s\n' % (self.name, index, stype, fpath)) + found = fpath + # if found == '': + # logger.error('Target %s Index %s Simulation %s : ' + # 'could not find initial coordinate file\n' + # 'Please provide one of the following:\n%s' + # % (self.name, index, stype, '\n'.join(paths))) + # raise RuntimeError + return found, 0 if numbered else icn + def determine_simulations(self): """ Determine which simulations need to be run. The same - simulations are run for each ensemble in the data set. + simulations are run for each index in the data set. + + Note that there may be a different number of initial + conditions (i.e. parallel runs) for different indices. """ # Determine which simulations are needed. sreqs = OrderedDict() for obsname in self.Observables: - sreqs[obsname] = self.Observables[obsname][self.Ensembles[0]].sreq - - def narrow(): - # Get the names of simulations that are REQUIRED to calculate the observables. - toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs])) - # Whoa, this is a deeply nested loop. What does it do? - # First loop over the elements in "sreqs" for each observable name. - # If the element is a string, then it's a required simulation name (top level). - # If the element is a list, then it's a list of valid simulation names - # and we need to narrow the list down. - # For the ones that are lists (and have any intersection with the top level), - # delete the ones that don't intersect. - sreq0 = copy.deepcopy(sreqs) + sreqs[obsname] = self.Observables[obsname][self.Indices[0]].sreq + + # toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs])) + + self.Simulations = OrderedDict([(i, OrderedDict()) for i in self.Indices]) + tsnames = [] + for obsname in self.Observables: + treqs = self.Observables[obsname][self.Indices[0]].treq + for treq in treqs: + if treq not in tsnames: + tsnames.append(treq) + + for index in self.Indices: + # Loop over observable names. Here we determine whether + # the initial coordinates are missing (bad), unique (good) or ambiguous (bad). + if 'n_ic' in self.Data.ix[index]: + n_ic = self.Data.ix[index]['n_ic'] + else: + n_ic = 1 for obsname in sreqs: - for sims in sreqs[obsname]: - if type(sims) == list: - if len(sims) == 1: - sreqs[obsname] = [sims[0]] - elif any([i in sims for i in toplevel]): - for j in sims: - if j not in toplevel: sims.remove(j) - return sreqs != sreq0 - - print sreqs - while narrow(): - print sreqs + for stypes in sreqs[obsname]: + if isinstance(stypes, str): + stypes = [stypes] + for icn in range(n_ic): + icfiles = [] + svalid = [] + for stype in stypes: + fpath, iframe = self.find_ic(index, stype, icn) + if fpath != '': + icfiles.append(fpath) + svalid.append(stype) + if len(icfiles) == 0: + logger.error('Target %s Index %s Simulation %s : ' + 'could not find initial coordinate file\n' + % (self.name, index, stype)) + raise RuntimeError + elif len(icfiles) > 1: + logger.error('Target %s Index %s Simulation %s : ' + 'ambiguous initial coordinate files (%s)' + % (self.name, index, stype, ' '.join(icfiles))) + self.Simulations[index][svalid[0]] = Simulation(index, svalid[0], icfiles[0], iframe, tsnames) + + print self.Simulations + print tsnames + # raw_input() + # if isinstance(sreqs[obsname], str): + # stypes = [sreqs[obsname]] + # for stype in stypes: + # print index, stype + + # for stype in toplevel: + # for index in self.Indices: + # def find_ic(icn): + # found = '' + # # Initial condition files will be searched for in the following priority: + # # targets/target_name/index/stype/ICs/stype_#.xyz + # # targets/target_name/index/stype/ICs/stype#.xyz + # # targets/target_name/index/stype/ICs/#.xyz + # # targets/target_name/index/stype/ICs/stype.xyz + # # targets/target_name/index/stype/ICs/coords.xyz + # # targets/target_name/index/stype/stype.xyz + # # targets/target_name/index/stype/coords.xyz + # # targets/target_name/index/stype.xyz + # # targets/target_name/stype.xyz + # basefnms = [(os.path.join(index, stype, 'ICs', stype+'_'+("%i" % icn)), True), + # (os.path.join(index, stype, 'ICs', stype+("%i" % icn)), True), + # (os.path.join(index, stype, 'ICs', ("%i" % icn)), True), + # (os.path.join(index, stype, 'ICs', stype), False), + # (os.path.join(index, stype, 'ICs', 'coords'), False), + # (os.path.join(index, stype, stype), False), + # (os.path.join(index, stype, 'coords'), False), + # (os.path.join(index, stype), False), + # (os.path.join(stype), False)] + # paths = [] + # for fnm, numbered in basefnms: + # for crdsfx in self.crdsfx: + # fpath = os.path.join(self.tgtdir, fnm+crdsfx) + # paths.append(fpath) + # if os.path.exists(fpath): + # if found != '': + # logger.info('Target %s Index %s Simulation %s : ' + # '%s overrides %s\n' % (self.name, index, stype, fpath)) + # else: + # if not numbered: + # M = Molecule(fpath) + # if len(M) <= icn: + # logger.error("Target %s Index %s Simulation %s : " + # "initial coordinate file %s doesn't have enough structures\n" % + # (self.name, index, stype, fpath)) + # raise RuntimeError + # logger.info('Target %s Index %s Simulation %s : ' + # 'found initial coordinate file %s\n' % (self.name, index, stype, fpath)) + # found = fpath + # if found == '': + # logger.error('Target %s Index %s Simulation %s : ' + # 'could not find initial coordinate file\n' + # 'Please provide one of the following:\n%s' + # % (self.name, index, stype, '\n'.join(paths))) + # raise RuntimeError + # return found + # if 'n_ic' in self.Data.ix[index]: + # n_ic = self.Data.ix[index]['n_ic'] + # else: + # n_ic = 1 + # for i in range(n_ic): + # fpath = find_ic(i) + + raw_input() + + # toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs])) + # print toplevel + # raw_input() + # return + + # def narrow(): + # # Get the names of simulations that are REQUIRED to calculate the observables. + # toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs])) + # # Whoa, this is a deeply nested loop. What does it do? + # # First loop over the elements in "sreqs" for each observable name. + # # If the element is a string, then it's a required simulation name (top level). + # # If the element is a list, then it's a list of valid simulation names + # # and we need to narrow the list down. + # # For the ones that are lists (and have any intersection with the top level), + # # delete the ones that don't intersect. + # sreq0 = copy.deepcopy(sreqs) + # for obsname in sreqs: + # for sims in sreqs[obsname]: + # if type(sims) == list: + # if len(sims) == 1: + # sreqs[obsname] = [sims[0]] + # elif any([i in sims for i in toplevel]): + # for j in sims: + # if j not in toplevel: sims.remove(j) + # return sreqs != sreq0 + + # print sreqs + # while narrow(): + # print sreqs # For the leftover observables where there is still some ambiguity, # we attempt # To do: Figure this out from existing initial conditions maybe - for obsname in sreqs: - for sims in sreqs[obsname]: - if type(sims) == list: - for sim in sims: - if has_ic(sim): - sreqs[obsname] = [sim] + # for obsname in sreqs: + # for sims in sreqs[obsname]: + # if type(sims) == list: + # for sim in sims: + # if has_ic(sim): + # sreqs[obsname] = [sim] - self.Simulations = OrderedDict([(i, []) for i in self.Ensembles]) + # self.Simulations = OrderedDict([(i, []) for i in self.Indices]) return @@ -633,7 +810,7 @@ def prepare_simulations(self): # print narrow() # The list of simulations that we'll be running. - self.Simulations = OrderedDict([(i, []) for i in self.Ensembles]) + self.Simulations = OrderedDict([(i, []) for i in self.Indices]) return @@ -1070,4 +1247,38 @@ def __str__(self): return "\n".join(msg) +class Simulation(object): + """ + Data container for a simulation (specified by index, simulation + type, initial condition). + """ + + def __init__(self, index, stype, initial, iframe, tsnames): + self.index = index + self.stype = stype + self.initial = initial + self.iframe = iframe + self.timeseries = OrderedDict([(i, []) for i in tsnames]) + + def __str__(self): + msg = [] + if self.temperature is None: + msg.append("State: Unknown.") + elif self.pressure is None: + msg.append("State: Point " + str(self.idnr) + " at " + + str(self.temperature) + " K.") + else: + msg.append("State: Point " + str(self.idnr) + " at " + + str(self.temperature) + " K and " + + str(self.pressure) + " bar.") + + msg.append("Point " + str(self.idnr) + " reference data " + "-"*30) + for key in self.ref: + msg.append(" " + key.strip() + " = " + str(self.ref[key]).strip()) + + msg.append("Point " + str(self.idnr) + " calculated data " + "-"*30) + for key in self.data: + msg.append(" " + key.strip() + " = " + str(self.data[key]).strip()) + + return "\n".join(msg) diff --git a/studies/004_thermo/single.in b/studies/004_thermo/single.in index 8bfd6281f..275265680 100644 --- a/studies/004_thermo/single.in +++ b/studies/004_thermo/single.in @@ -66,7 +66,7 @@ $target name LiquidBromine type Thermo_GMX weight 1.0 -expdata_txt expset.txt +source expset.txt quantities density h_vap n_sim_chain 2 md_steps 100000 diff --git a/studies/004_thermo/targets/LiquidBromine/1/sim2.gro b/studies/004_thermo/targets/LiquidBromine/1/gas.gro similarity index 100% rename from studies/004_thermo/targets/LiquidBromine/1/sim2.gro rename to studies/004_thermo/targets/LiquidBromine/1/gas.gro diff --git a/studies/004_thermo/targets/LiquidBromine/1/sim2.mdp b/studies/004_thermo/targets/LiquidBromine/1/gas.mdp similarity index 100% rename from studies/004_thermo/targets/LiquidBromine/1/sim2.mdp rename to studies/004_thermo/targets/LiquidBromine/1/gas.mdp diff --git a/studies/004_thermo/targets/LiquidBromine/1/sim2.top b/studies/004_thermo/targets/LiquidBromine/1/gas.top similarity index 100% rename from studies/004_thermo/targets/LiquidBromine/1/sim2.top rename to studies/004_thermo/targets/LiquidBromine/1/gas.top diff --git a/studies/004_thermo/targets/LiquidBromine/1/sim1.gro b/studies/004_thermo/targets/LiquidBromine/1/liquid.gro similarity index 100% rename from studies/004_thermo/targets/LiquidBromine/1/sim1.gro rename to studies/004_thermo/targets/LiquidBromine/1/liquid.gro diff --git a/studies/004_thermo/targets/LiquidBromine/1/sim1.mdp b/studies/004_thermo/targets/LiquidBromine/1/liquid.mdp similarity index 100% rename from studies/004_thermo/targets/LiquidBromine/1/sim1.mdp rename to studies/004_thermo/targets/LiquidBromine/1/liquid.mdp diff --git a/studies/004_thermo/targets/LiquidBromine/1/sim1.top b/studies/004_thermo/targets/LiquidBromine/1/liquid.top similarity index 100% rename from studies/004_thermo/targets/LiquidBromine/1/sim1.top rename to studies/004_thermo/targets/LiquidBromine/1/liquid.top diff --git a/studies/004_thermo/targets/LiquidBromine/expset.txt b/studies/004_thermo/targets/LiquidBromine/expset.txt index 0bfd3a7dc..766c88b32 100644 --- a/studies/004_thermo/targets/LiquidBromine/expset.txt +++ b/studies/004_thermo/targets/LiquidBromine/expset.txt @@ -1,7 +1,7 @@ # Experimental data for liquid bromine. Index Temp (K) Pressure (bar) Density (kg/m^3) w Hvap ( kJ/mol ) w - 0 298.15 1.01325 3102.8 1.0 29.96 1.0 + 1 298.15 1.01325 3102.8 1.0 29.96 1.0 # Variables: Denominators and weights for quantities Denoms = 30 0.3 From afd84859ad8172ff31e1e5a23e50932bab3ebd36 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Tue, 8 Apr 2014 03:30:43 -0700 Subject: [PATCH 10/25] Observable and simulation setup should be working correctly now --- src/nifty.py | 6 + src/observable.py | 151 ++++++++++---- src/parser.py | 2 +- src/thermo.py | 348 +++++++++---------------------- studies/004_thermo/single.in | 2 +- studies/004_thermo/test_parse.in | 16 +- 6 files changed, 215 insertions(+), 310 deletions(-) diff --git a/src/nifty.py b/src/nifty.py index 3fcc29d3b..b188934f9 100644 --- a/src/nifty.py +++ b/src/nifty.py @@ -237,6 +237,12 @@ def magic_string(str): #===============================# #| Math: Variable manipulation |# #===============================# +def isnan(var): + """ Attempt to see if the given variable is np.nan. """ + if isinstance(var, float): + return np.isnan(var) + return False + def isint(word): """ONLY matches integers! If you have a decimal point? None shall pass! diff --git a/src/observable.py b/src/observable.py index a1139105a..c0eea075f 100644 --- a/src/observable.py +++ b/src/observable.py @@ -88,14 +88,15 @@ class Observable(object): "treq" - that are needed to *differentiate* this observable. (Usually energy derivatives) """ - def __init__(self, source, name=None): - self.name = name if name is not None else "empty" - self.sreq = [] - self.treq = [] - self.dreq = ['energy_derivatives'] + + def __init__(self, source): + # Reference data which can be useful in calculating the observable. + self.Data = source[self.columns] + # Required time series for the gradient (defaults to energy derivatives). + self.grad_requires = OrderedDict([(simulation, 'energy_derivatives') for simulation in self.requires.keys()]) def __str__(self): - return "observable is " + self.name.capitalize() + "." + return "Observable = " + self.name.capitalize() + "; Columns = " + ', '.join(self.columns) def extract(self, engines, FF, mvals, h, AGrad=True): """Calculate and extract the observable from MD results. How this is done @@ -130,17 +131,25 @@ def extract(self, engines, FF, mvals, h, AGrad=True): # class Observable_Density class Observable_Density(Observable): - def __init__(self, source, name=None): - """ Density. """ - super(Observable_Density, self).__init__(source, name) - - self.name = name if name is not None else "density" - # Calculating the density requires either a liquid or solid simulation. - self.sreq = [['liquid', 'solid']] + """ + The Observable_Density class implements common methods for + extracting the density from a simulation, but does not specify the + simulation itself ('requires' attribute). Don't create a + Density object directly, use the Liquid_Density and Solid_Density + derived classes. + + This is due to our overall framework that each observable must + have a unique list of required simulations, yet the formula for + calculating the density and its derivative is always the same. + """ - # Requires timeseries of densities from the simulation. - self.treq = ['density'] + def __init__(self, source): + # Name of the observable. + self.name = 'density' + # Columns that are taken from the data table. + self.columns = ['density'] + super(Observable_Density, self).__init__(source) def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): #==========================================# @@ -201,22 +210,32 @@ def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): # Analytic first derivative. Rho_grad = mBeta * (flat(np.mat(G) * col(Density)) / len(Density) \ - np.mean(Density) * np.mean(G, axis=1)) - return Rho_avg, Rho_err, Rho_grad +class Liquid_Density(Observable_Density): + def __init__(self, source): + # The density time series is required from the simulation. + self.requires = OrderedDict([('liquid', ['density'])]) + super(Liquid_Density, self).__init__(source) + +class Solid_Density(Observable_Density): + def __init__(self, source): + # The density time series is required from the simulation. + self.requires = OrderedDict([('solid', ['density'])]) + super(Solid_Density, self).__init__(source) + # class Observable_H_vap class Observable_H_vap(Observable): - def __init__(self, source, name=None): + def __init__(self, source): """ Enthalpy of vaporization. """ - super(Observable_H_vap, self).__init__(source, name) - - self.name = name if name is not None else "H_vap" - - # Calculating the heat of vaporization requires a liquid simulation and a gas simulation. - self.sreq = [['liquid'], ['gas']] - - # Requires timeseries of energies and volumes from the simulation. - self.treq = [['energy', 'volume'], ['energy']] + # Name of the observable. + self.name = 'hvap' + # Columns that are taken from the data table. + self.columns = ['hvap'] + # Get energy/volume from liquid simulation, and energy from gas simulation. + self.requires = OrderedDict([('liquid', ['energy', 'volume']), ('gas', ['energy'])]) + # Initialize the base class + super(Observable_H_vap, self).__init__(source) def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): #==========================================# @@ -315,23 +334,63 @@ def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): return Hvap_avg, Hvap_err, Hvap_grad -# class Observable_Kappa -class Observable_Kappa(Observable): - def __init__(self, source, name=None): - """ Compressibility (applies to liquid and lipid bilayer.) """ - super(Observable_H_vap, self).__init__(source, name) - - self.name = name if name is not None else "H_vap" - - # List of dictionaries of simulations, keyed to timeseries to extract from the simulation. - # Each dictionary represents a simulation in a sequence, but the observable isn't mapped to a unique simulation each time. - # Because of this, we determine which simulation to launch (in the sequence) based on the available initial coordinates (or explicit user input). - # Depending on which simulation is executed, we require different timeseries from the simulation, and different formulas. - # But another way is to just define two observables ... need to think about it. - - self.sreq = [{'liquid':['volume'], 'bilayer':['al']}, - ] - - - # Requires timeseries of energies and volumes from the simulation. - self.treq = [['energy', 'volume'], ['energy']] +# class Observable_Al +class Observable_Al(Observable): + def __init__(self, source): + """ Area per lipid. """ + # Name of the observable. + self.name = 'al' + # Columns that are taken from the data table. + self.columns = ['al'] + # Get area per lipid from the bilayer simulation. + self.requires = OrderedDict([('bilayer', ['al'])]) + # Initialize the base class + super(Observable_Al, self).__init__(source) + +# class Observable_Scd +class Observable_Scd(Observable): + def __init__(self, source): + """ Deuterium order parameter. """ + # Name of the observable. + self.name = 'scd' + # Columns that are taken from the data table. + self.columns = ['scd1_idx', 'scd1', 'scd2_idx', 'scd2'] + # Get deuterium order parameter from the bilayer simulation. + self.requires = OrderedDict([('bilayer', ['scd1', 'scd2'])]) + # Initialize the base class + super(Observable_Scd, self).__init__(source) + +# class Lipid_Kappa +class Lipid_Kappa(Observable): + def __init__(self, source): + """ Compressibility as calculated for lipid bilayers. """ + # Name of the observable. + self.name = 'kappa' + # Columns that are taken from the data table. + self.columns = ['kappa'] + # Get area per lipid from the bilayer simulation. + self.requires = OrderedDict([('bilayer', ['al'])]) + # Initialize the base class + super(Lipid_Kappa, self).__init__(source) + +# class Liquid_Kappa +class Liquid_Kappa(Observable): + def __init__(self, source): + """ Compressibility as calculated for liquids. """ + # Name of the observable. + self.name = 'kappa' + # Columns that are taken from the data table. + self.columns = ['kappa'] + # Get area per lipid from the bilayer simulation. + self.requires = OrderedDict([('liquid', ['volume'])]) + # Initialize the base class + super(Liquid_Kappa, self).__init__(source) + +## A mapping that takes us from observable names to possible Observable objects. +OMap = {'density' : [Liquid_Density, Solid_Density], + 'rho' : [Liquid_Density, Solid_Density], + 'hvap' : [Observable_H_vap], + 'h_vap' : [Observable_H_vap], + 'al' : [Observable_Al], + 'kappa' : [Liquid_Kappa, Lipid_Kappa], + 'scd' : [Observable_Scd]} diff --git a/src/parser.py b/src/parser.py index 340b45723..fb3d26e58 100644 --- a/src/parser.py +++ b/src/parser.py @@ -158,6 +158,7 @@ }, 'lists' : {"fd_ptypes" : ([], -100, 'The parameter types that are differentiated using finite difference', 'In conjunction with fdgrad, fdhess, fdhessdiag; usually not needed'), "observables" : ([], 100, 'List of observables to be fitted, each must have corresponding Quantity subclass', 'Thermodynamic properties target', 'thermo'), + "simulations" : ([], 100, 'Simulations used to calculate observables.', 'Thermodynamic property targets', 'thermo'), }, 'ints' : {"shots" : (-1, 0, 'Number of snapshots; defaults to all of the snapshots', 'Energy + Force Matching', 'AbInitio'), "fitatoms" : (0, 0, 'Number of fitting atoms; defaults to all of them', 'Energy + Force Matching', 'AbInitio'), @@ -174,7 +175,6 @@ "save_traj" : (0, -10, 'Whether to save trajectories. 0 = Never save; 1 = Delete if optimization step is good; 2 = Always save', 'Condensed phase properties', 'Liquid, Lipid'), "eq_steps" : (20000, 0, 'Number of time steps for the equilibration run.', 'Thermodynamic property targets', 'thermo'), "md_steps" : (50000, 0, 'Number of time steps for the production run.', 'Thermodynamic property targets', 'thermo'), - "n_sim_chain" : (1, 0, 'Number of simulations required to calculate observables.', 'Thermodynamic property targets', 'thermo'), }, 'bools' : {"whamboltz" : (0, -100, 'Whether to use WHAM Boltzmann Weights', 'Ab initio targets with Boltzmann weights (advanced usage)', 'AbInitio'), "sampcorr" : (0, -150, 'Whether to use the archaic sampling correction', 'Energy + Force Matching, very old option, do not use', 'AbInitio'), diff --git a/src/thermo.py b/src/thermo.py index 93e50e866..4cf5661a8 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -8,10 +8,11 @@ import itertools import cStringIO -from forcebalance.observable import * +from forcebalance.molecule import Molecule +from forcebalance.observable import OMap from forcebalance.target import Target from forcebalance.finite_difference import in_fd -from forcebalance.nifty import flat, col, row, isint +from forcebalance.nifty import flat, col, row, isint, isnan from forcebalance.nifty import lp_dump, lp_load, wopen, _exec from forcebalance.nifty import LinkFile, link_dir_contents from forcebalance.nifty import printcool, printcool_dictionary @@ -363,9 +364,9 @@ def __init__(self, options, tgt_opts, forcefield): # Source data (experimental data, model parameters and weights) self.set_option(tgt_opts, "source", forceprint=True) # Observables to calculate - self.set_option(tgt_opts, "observables", "observable_names", forceprint=True) + self.set_option(tgt_opts, "observables", "onames", forceprint=True) # Length of simulation chain - self.set_option(tgt_opts, "n_sim_chain", forceprint=True) + self.set_option(tgt_opts, "simulations", forceprint=True) # Number of time steps in the equilibration run self.set_option(tgt_opts, "eq_steps", forceprint=True) # Number of time steps in the production run @@ -385,23 +386,20 @@ def __init__(self, options, tgt_opts, forcefield): 'tinker':['.xyz', '.arc'], 'openmm':['.pdb']}[self.engname.lower()] - ## A mapping that takes us from observable names to Observable objects. - self.Observable_Map = {'density' : Observable_Density, - 'rho' : Observable_Density, - 'hvap' : Observable_H_vap, - 'h_vap' : Observable_H_vap} - - - ## Read source data and initialize points; creates self.Data, self.Indices and self.Observables objects. + ## Read source data and initialize points; creates self.Data, self.Indices and self.Columns objects. self.read_source(os.path.join(self.root, self.tgtdir, self.source)) + + ## Set up self.Observables. + self.initialize_observables() + + ## Set up self.Simulations. + self.initialize_simulations() ## Copy run scripts from ForceBalance installation directory for f in self.scripts: LinkFile(os.path.join(os.path.split(__file__)[0], "data", f), os.path.join(self.root, self.tempdir, f)) - ## Set up simulations - #self.determine_simulations() def read_source(self, srcfnm): """Read and store source data. @@ -421,8 +419,8 @@ def read_source(self, srcfnm): source = parse1(srcfnm) printcool_dictionary(source.metadata, title="Metadata") revhead = [] - obs = '' - obsnames = [] + col = '' + colnames = [] units = defaultdict(str) @@ -430,8 +428,8 @@ def read_source(self, srcfnm): if i == 0 and head.lower() == 'index': # Treat special case because index can also mean other things revhead.append('index') continue - newh, punit, obs = stand_head(head, obs) - if obs not in obsnames + ['temp', 'pres', 'n_ic']: obsnames.append(obs) + newh, punit, col = stand_head(head, col) + if col not in colnames + ['temp', 'pres', 'n_ic']: colnames.append(col) revhead.append(newh) if punit != '': units[newh] = punit @@ -481,7 +479,7 @@ def read_source(self, srcfnm): # (2) There can only be one file per system index / column. # (3) The column heading in the secondary file that's being # referenced must match that of the reference in the primary file. - obs2 = '' + col2 = '' for cid_, fld in enumerate(row[1:]): if ':' not in fld: continue cid = cid_ + 1 @@ -504,7 +502,7 @@ def reffld_error(reason=''): reffld_error('%s already contains a file reference' % (saveidx, revhead[cid])) subfile = parse1(fpath) fcol = fcol_ - 1 - head2, punit2, obs2 = stand_head(subfile.heading[fcol], obs2) + head2, punit2, col2 = stand_head(subfile.heading[fcol], col2) if revhead[cid] != head2: reffld_error("Column heading of %s (%s) doesn't match original (%s)" % (fnm, head2, revhead[cid])) fref[(saveidx, revhead[cid])] = [row2[fcol] for row2 in subfile.table] @@ -533,6 +531,18 @@ def reffld_error(reason=''): # Turn it into a pandas DataFrame. self.Data = pd.DataFrame(drows, columns=revhead[1:], index=pd.MultiIndex.from_tuples(index, names=['index', 'subindex'])) + def intcol(col): + if col in self.Data.columns: + for idx in self.Data.index: + if not isnan(self.Data[col][idx]): + self.Data[col][idx] = int(self.Data[col][idx]) + + def floatcol(col): + if col in self.Data.columns: + self.Data[col] = self.Data[col].astype(float) + + intcol('n_ic') + # A list of indices (i.e. top-level indices) which correspond # to sets of simulations that we'll be running. self.Indices = [] @@ -540,42 +550,9 @@ def reffld_error(reason=''): if idx[0] not in self.Indices: self.Indices.append(idx[0]) - # A list of Observable objects (i.e. column headings) which - # contain methods for calculating observables that we need. - # Think about: - # (1) How much variability is allowed across Indices? - # For instance, different S_cd is permissible. - self.Observables = OrderedDict() - for obsname in [stand_head(i, '')[2] for i in self.observable_names]: - if obsname in self.Observables: - logger.error('%s was already specified as an observable' % (obsname)) - self.Observables[obsname] = OrderedDict() - for ie, index in enumerate(self.Indices): - if obsname in self.Observable_Map: - newobs = self.Observable_Map[obsname](source=self.Data.ix[index]) - logger.info('%s is specified as an observable, appending %s class\n' % (obsname, newobs.__class__.__name__)) - self.Observables[obsname][index] = newobs - else: - logger.warn('%s is specified but there is no corresponding Observable class, appending empty one\n' % obsname) - self.Observables[obsname][index] = Observable(name=obsname, source=self.Data.ix[index]) - - print self.Indices - print self.Observables - print repr(self.Data) - raw_input() - # for index in self.Indices: - # self.Observables[index] = [] - # for obsname in obsnames: - # for index, ie in enumerate(self.Indices): - # if obsname in self.Observable_Map: - # newobs = self.Observable_Map[obsname](source=self.Data.ix[index]) - # if newobs.name in [obs.name for obs in self.Observables[index]]: - # logger.error('%s is specified but a %s observable already exists' % (obsname, newobs.__class__.__name__)) - # logger.info('%s is specified as an observable, appending %s class\n' % (obsname, newobs.__class__.__name__)) - # self.Observables[index].append(newobs) - # else: - # logger.warn('%s is specified but there is no corresponding Observable class, appending empty one\n' % obsname) - # self.Observables[index].append(Observable(name=obsname, source=self.Data.ix[index])) + # List of columns in the main data table. + self.Columns = self.Data.columns + return def find_ic(self, index, stype, icn): @@ -623,195 +600,75 @@ def find_ic(self, index, stype, icn): logger.info('Target %s Index %s Simulation %s : ' 'found initial coordinate file %s\n' % (self.name, index, stype, fpath)) found = fpath - # if found == '': - # logger.error('Target %s Index %s Simulation %s : ' - # 'could not find initial coordinate file\n' - # 'Please provide one of the following:\n%s' - # % (self.name, index, stype, '\n'.join(paths))) - # raise RuntimeError + + if found == '': + logger.error('Could not find initial coordinate file for simulation type %s' % stype) + raise RuntimeError + return found, 0 if numbered else icn - def determine_simulations(self): - + def initialize_observables(self): """ - Determine which simulations need to be run. The same - simulations are run for each index in the data set. + Determine Observable objects to be created. Checks to see + whether simulations are consistent with observables (i.e. no + missing simulations or ambiguities.) + In order to implement a new observable, create a class in + observable.py and add it to OMap. + """ + self.Observables = OrderedDict() + for oname in [stand_head(i, '')[2] for i in self.onames]: + if oname in self.Observables: + logger.error('%s was already specified as an observable' % (oname)) + raise RuntimeError + self.Observables[oname] = OrderedDict() + for index in self.Indices: + if oname in OMap: + Objs = [] + Reqs = [] + for OClass in OMap[oname]: + OObj = OClass(self.Data) + Reqs.append(OObj.requires.keys()) + if all([i in self.simulations for i in OObj.requires.keys()]): + Objs.append(OObj) + if len(Objs) == 0: + logger.error('Observable %s is specified but required simulations are missing; choose %s' % (oname, ' or '.join([str(r) for r in Reqs]))) + raise RuntimeError + if len(Objs) > 1: + logger.error("Observable %s not uniquely mapped to simulations (choose between %s)" % (oname, ' or '.join([o.name in Objs]))) + raise RuntimeError + logger.info("Creating %s observable object for index %s\n" % (Objs[0].name, index)) + self.Observables[oname][index] = Objs[0] + else: + logger.error('%s is specified but there is no corresponding Observable class\n' % oname) + raise RuntimeError + return + + def initialize_simulations(self): + """ + Determine simulations to be run. The same simulations are + run for each index in the data set. + Note that there may be a different number of initial conditions (i.e. parallel runs) for different indices. """ - # Determine which simulations are needed. - sreqs = OrderedDict() - for obsname in self.Observables: - sreqs[obsname] = self.Observables[obsname][self.Indices[0]].sreq - - # toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs])) - self.Simulations = OrderedDict([(i, OrderedDict()) for i in self.Indices]) - tsnames = [] - for obsname in self.Observables: - treqs = self.Observables[obsname][self.Indices[0]].treq - for treq in treqs: - if treq not in tsnames: - tsnames.append(treq) - for index in self.Indices: - # Loop over observable names. Here we determine whether - # the initial coordinates are missing (bad), unique (good) or ambiguous (bad). if 'n_ic' in self.Data.ix[index]: - n_ic = self.Data.ix[index]['n_ic'] + nics = [i for i in self.Data.ix[index]['n_ic'] if not isnan(i)] + if len(nics) != 1: + logger.error("Expected 1 number for n_ic but got %i" % len(nics)) + raise RuntimeError + n_ic = nics[0] else: n_ic = 1 - for obsname in sreqs: - for stypes in sreqs[obsname]: - if isinstance(stypes, str): - stypes = [stypes] - for icn in range(n_ic): - icfiles = [] - svalid = [] - for stype in stypes: - fpath, iframe = self.find_ic(index, stype, icn) - if fpath != '': - icfiles.append(fpath) - svalid.append(stype) - if len(icfiles) == 0: - logger.error('Target %s Index %s Simulation %s : ' - 'could not find initial coordinate file\n' - % (self.name, index, stype)) - raise RuntimeError - elif len(icfiles) > 1: - logger.error('Target %s Index %s Simulation %s : ' - 'ambiguous initial coordinate files (%s)' - % (self.name, index, stype, ' '.join(icfiles))) - self.Simulations[index][svalid[0]] = Simulation(index, svalid[0], icfiles[0], iframe, tsnames) - - print self.Simulations - print tsnames - # raw_input() - # if isinstance(sreqs[obsname], str): - # stypes = [sreqs[obsname]] - # for stype in stypes: - # print index, stype - - # for stype in toplevel: - # for index in self.Indices: - # def find_ic(icn): - # found = '' - # # Initial condition files will be searched for in the following priority: - # # targets/target_name/index/stype/ICs/stype_#.xyz - # # targets/target_name/index/stype/ICs/stype#.xyz - # # targets/target_name/index/stype/ICs/#.xyz - # # targets/target_name/index/stype/ICs/stype.xyz - # # targets/target_name/index/stype/ICs/coords.xyz - # # targets/target_name/index/stype/stype.xyz - # # targets/target_name/index/stype/coords.xyz - # # targets/target_name/index/stype.xyz - # # targets/target_name/stype.xyz - # basefnms = [(os.path.join(index, stype, 'ICs', stype+'_'+("%i" % icn)), True), - # (os.path.join(index, stype, 'ICs', stype+("%i" % icn)), True), - # (os.path.join(index, stype, 'ICs', ("%i" % icn)), True), - # (os.path.join(index, stype, 'ICs', stype), False), - # (os.path.join(index, stype, 'ICs', 'coords'), False), - # (os.path.join(index, stype, stype), False), - # (os.path.join(index, stype, 'coords'), False), - # (os.path.join(index, stype), False), - # (os.path.join(stype), False)] - # paths = [] - # for fnm, numbered in basefnms: - # for crdsfx in self.crdsfx: - # fpath = os.path.join(self.tgtdir, fnm+crdsfx) - # paths.append(fpath) - # if os.path.exists(fpath): - # if found != '': - # logger.info('Target %s Index %s Simulation %s : ' - # '%s overrides %s\n' % (self.name, index, stype, fpath)) - # else: - # if not numbered: - # M = Molecule(fpath) - # if len(M) <= icn: - # logger.error("Target %s Index %s Simulation %s : " - # "initial coordinate file %s doesn't have enough structures\n" % - # (self.name, index, stype, fpath)) - # raise RuntimeError - # logger.info('Target %s Index %s Simulation %s : ' - # 'found initial coordinate file %s\n' % (self.name, index, stype, fpath)) - # found = fpath - # if found == '': - # logger.error('Target %s Index %s Simulation %s : ' - # 'could not find initial coordinate file\n' - # 'Please provide one of the following:\n%s' - # % (self.name, index, stype, '\n'.join(paths))) - # raise RuntimeError - # return found - # if 'n_ic' in self.Data.ix[index]: - # n_ic = self.Data.ix[index]['n_ic'] - # else: - # n_ic = 1 - # for i in range(n_ic): - # fpath = find_ic(i) - - raw_input() - - # toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs])) - # print toplevel - # raw_input() - # return - - # def narrow(): - # # Get the names of simulations that are REQUIRED to calculate the observables. - # toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs])) - # # Whoa, this is a deeply nested loop. What does it do? - # # First loop over the elements in "sreqs" for each observable name. - # # If the element is a string, then it's a required simulation name (top level). - # # If the element is a list, then it's a list of valid simulation names - # # and we need to narrow the list down. - # # For the ones that are lists (and have any intersection with the top level), - # # delete the ones that don't intersect. - # sreq0 = copy.deepcopy(sreqs) - # for obsname in sreqs: - # for sims in sreqs[obsname]: - # if type(sims) == list: - # if len(sims) == 1: - # sreqs[obsname] = [sims[0]] - # elif any([i in sims for i in toplevel]): - # for j in sims: - # if j not in toplevel: sims.remove(j) - # return sreqs != sreq0 - - # print sreqs - # while narrow(): - # print sreqs - # For the leftover observables where there is still some ambiguity, - # we attempt - # To do: Figure this out from existing initial conditions maybe - # for obsname in sreqs: - # for sims in sreqs[obsname]: - # if type(sims) == list: - # for sim in sims: - # if has_ic(sim): - # sreqs[obsname] = [sim] - - - # self.Simulations = OrderedDict([(i, []) for i in self.Indices]) - - return - - def prepare_simulations(self): - - """ - - Prepare simulations to be launched. Set initial conditions - and create directories. This function is intended to be run - at the start of each optimization cycle, so that initial - conditions may be easily set. - - """ - # print narrow() - - # The list of simulations that we'll be running. - self.Simulations = OrderedDict([(i, []) for i in self.Indices]) - + for s in self.simulations: + for icn in range(n_ic): + fpath, iframe = self.find_ic(index, s, icn) + self.Simulations[index][s] = Simulation(index, s, fpath, iframe) + print index, s, str(self.Simulations[index][s]) + # print self.Simulations return def launch_simulation(self, index, simname): @@ -1254,31 +1111,14 @@ class Simulation(object): type, initial condition). """ - def __init__(self, index, stype, initial, iframe, tsnames): + def __init__(self, index, stype, initial, iframe): self.index = index - self.stype = stype + self.type = stype self.initial = initial self.iframe = iframe - self.timeseries = OrderedDict([(i, []) for i in tsnames]) def __str__(self): msg = [] - if self.temperature is None: - msg.append("State: Unknown.") - elif self.pressure is None: - msg.append("State: Point " + str(self.idnr) + " at " + - str(self.temperature) + " K.") - else: - msg.append("State: Point " + str(self.idnr) + " at " + - str(self.temperature) + " K and " + - str(self.pressure) + " bar.") - - msg.append("Point " + str(self.idnr) + " reference data " + "-"*30) - for key in self.ref: - msg.append(" " + key.strip() + " = " + str(self.ref[key]).strip()) - - msg.append("Point " + str(self.idnr) + " calculated data " + "-"*30) - for key in self.data: - msg.append(" " + key.strip() + " = " + str(self.data[key]).strip()) - - return "\n".join(msg) + msg.append("Simulation: Index %s Type %s" % (self.index, self.type)) + msg.append("Initial conditions: File %s Frame %i" % (self.initial, self.iframe)) + return '\n'.join(msg) diff --git a/studies/004_thermo/single.in b/studies/004_thermo/single.in index 275265680..9fb288210 100644 --- a/studies/004_thermo/single.in +++ b/studies/004_thermo/single.in @@ -68,7 +68,7 @@ type Thermo_GMX weight 1.0 source expset.txt quantities density h_vap -n_sim_chain 2 +simulations liquid gas md_steps 100000 eq_steps 50000 $end diff --git a/studies/004_thermo/test_parse.in b/studies/004_thermo/test_parse.in index 4eb82ab64..89edb69a7 100644 --- a/studies/004_thermo/test_parse.in +++ b/studies/004_thermo/test_parse.in @@ -72,7 +72,7 @@ type Thermo_GMX weight 1.0 source expset.txt observables density h_vap -n_sim_chain 2 +simulations liquid gas md_steps 100000 eq_steps 50000 $end @@ -83,7 +83,7 @@ type Thermo_GMX weight 1.0 source data.csv observables density h_vap -n_sim_chain 2 +simulations liquid gas md_steps 100000 eq_steps 50000 $end @@ -94,7 +94,7 @@ type Thermo_GMX weight 1.0 source data.tab.txt observables density h_vap -n_sim_chain 2 +simulations liquid gas md_steps 100000 eq_steps 50000 $end @@ -105,7 +105,7 @@ type Thermo_GMX weight 1.0 source lipidcol1.txt observables al scd kappa -n_sim_chain 2 +simulations bilayer md_steps 100000 eq_steps 50000 $end @@ -116,7 +116,7 @@ type Thermo_GMX weight 1.0 source lipidcol1.txt observables al scd kappa -n_sim_chain 2 +simulations bilayer md_steps 100000 eq_steps 50000 $end @@ -127,7 +127,7 @@ type Thermo_GMX weight 1.0 source lipidcol1.txt observables al scd kappa -n_sim_chain 2 +simulations bilayer md_steps 100000 eq_steps 50000 $end @@ -138,7 +138,7 @@ type Thermo_GMX weight 1.0 source lipidcol2a.txt observables al scd kappa -n_sim_chain 2 +simulations bilayer md_steps 100000 eq_steps 50000 $end @@ -149,7 +149,7 @@ type Thermo_GMX weight 1.0 source lipidcol1.txt observables al scd kappa -n_sim_chain 2 +simulations bilayer md_steps 100000 eq_steps 50000 $end From efdb8427ba222391ed828384e297d232c3620754 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Tue, 8 Apr 2014 03:32:39 -0700 Subject: [PATCH 11/25] Clean up --- src/thermo.py | 75 --------------------------------------------------- 1 file changed, 75 deletions(-) diff --git a/src/thermo.py b/src/thermo.py index 4cf5661a8..9d57010ae 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -697,81 +697,6 @@ def launch_simulation(self, index, simname): input_files = self.nptfiles + self.scripts + ['forcebalance.p'], output_files = ['npt_result.p.bz2', 'npt.out'] + self.extra_output, tgt=self) - # NAMES FOR OBJECTS! - - # Timeseries: Time series of an instantaneous observable that is - # returned by the MD simulation. - - # Observable: A thermodynamic property which can be compared to - # experiment and possesses methods for calculating the property - # and its derivatives. - - # State? Point? What should this be called?? - - # # print revhead[1:] - # # for rn, row in enumerate(drows): - # # print index[rn], row - - # # print repr(self.Data) - - # # # pd.DataFrame([OrderedDict([(head, row[i]) for i, head in revised_heading if row[i] != '']) for row in source.table]) - - - # # # pd.DataFrame(OrderedDict([(head,[row[i] for row in source.table]) for i, head in enumerate(revised_heading)])) - # # # print self.Data.__repr__ - # # # raw_input() - - # # return - - # fp = open(expdata) - - # line = fp.readline() - # foundHeader = False - # names = None - # units = None - # label_header = None - # label_unit = None - # count = 0 - # metadata = {} - # while line: - # # Skip comments and blank lines - # if line.lstrip().startswith("#") or not line.strip(): - # line = fp.readline() - # continue - # # Metadata is denoted using - # if "=" in line: # Read variable - # param, value = line.split("=") - # param = param.strip().lower() - # metadata[param] = value - # # if param == "denoms": - # # for e, v in enumerate(value.split()): - # # self.denoms[self.observables[e]] = float(v) - # # elif param == "weights": - # # for e, v in enumerate(value.split()): - # # self.weights[self.observables[e]] = float(v) - # elif foundHeader: # Read exp data - # count += 1 - # vals = line.split() - # label = (vals[0], label_header, label_unit) - # refs = np.array(vals[1:-2:2]).astype(float) - # wts = np.array(vals[2:-2:2]).astype(float) - # temperature = float(vals[-2]) - # pressure = None if vals[-1].lower() == "none" else \ - # float(vals[-1]) - # dp = Point(count, label=label, refs=refs, weights=wts, - # names=names, units=units, - # temperature=temperature, pressure=pressure) - # self.points.append(dp) - # else: # Read headers - # foundHeader = True - # headers = zip(*[tuple(h.split("_")) for h in line.split() - # if h != "w"]) - # label_header = list(headers[0])[0] - # label_unit = list(headers[1])[0] - # names = list(headers[0][1:-2]) - # units = list(headers[1][1:-2]) - # line = fp.readline() - def retrieve(self, dp): """Retrieve the molecular dynamics (MD) results and store the calculated observables in the Point object dp. From 132e70a7ead9ae5fba414d6a750b73eb012f589e Mon Sep 17 00:00:00 2001 From: Lee-Ping Date: Wed, 16 Apr 2014 14:35:08 -0700 Subject: [PATCH 12/25] Target now knows which simulations to launch. Next task: Pass in auxiliary files (.top, .mdp) and run the simulation. --- src/forcefield.py | 30 +- src/gmxio.py | 5 +- src/nifty.py | 12 + src/observable.py | 33 +- src/parser.py | 3 +- src/thermo.py | 469 ++++++------------ studies/004_thermo/single.in | 12 +- .../targets/Lipid_TAB/lipidcol1.txt | 4 +- 8 files changed, 219 insertions(+), 349 deletions(-) diff --git a/src/forcefield.py b/src/forcefield.py index 7db442a39..9aef243ff 100644 --- a/src/forcefield.py +++ b/src/forcefield.py @@ -193,7 +193,7 @@ class FF(forcebalance.BaseClass): For details on force field parsing, see the detailed documentation for addff. """ - def __init__(self, options, verbose=True): + def __init__(self, options, verbose=True, printopt=True): """Instantiation of force field class. @@ -227,6 +227,8 @@ def __init__(self, options, verbose=True): self.set_option(options, 'rigid_water') ## Bypass the transformation and use physical parameters directly self.set_option(options, 'use_pvals') + ## Allow duplicate parameter names (internally construct unique names) + self.set_option(options, 'duplicate_pnames') #======================================# # Variables which are set here # @@ -318,7 +320,12 @@ def __init__(self, options, verbose=True): self.linedestroy_this = [] self.prmdestroy_this = [] ## Print the optimizer options. - printcool_dictionary(self.PrintOptionDict, title="Setup for force field") + if printopt: printcool_dictionary(self.PrintOptionDict, title="Setup for force field") + + @classmethod + def fromfile(cls, fnm): + options = {'forcefield' : [fnm], 'ffdir' : '.', 'duplicate_pnames' : True} + return cls(options, verbose=False, printopt=False) def addff(self,ffname): """ Parse a force field file and add it to the class. @@ -496,15 +503,32 @@ def addff_txt(self, ffname, fftype): # For each of the fields that are to be parameterized (indicated by PRM #), # assign a parameter type to it according to the Interaction Type -> Parameter Dictionary. pid = self.Readers[ffname].build_pid(pfld) + pid_ = pid # Add pid into the dictionary. # LPW: Here is a hack to allow duplicate parameter IDs. if pid in self.map: pid0 = pid extranum = 0 + dupfnms = [os.path.basename(i[0]) for i in self.pfields[self.map[pid]]] + duplns = [i[1] for i in self.pfields[self.map[pid]]] + dupflds = [i[2] for i in self.pfields[self.map[pid]]] while pid in self.map: pid = "%s%i" % (pid0, extranum) extranum += 1 - logger.info("Encountered an duplicate parameter ID: parameter name has been changed to %s\n" % pid) + def warn_or_err(*args): + if self.duplicate_pnames: + logger.warn(*args) + else: + logger.error(*args) + warn_or_err("Encountered an duplicate parameter ID (%s)\n" % pid_) + warn_or_err("file %s line %i field %i duplicates:\n" + % (os.path.basename(ffname), ln+1, pfld)) + for dupfnm, dupln, dupfld in zip(dupfnms, duplns, dupflds): + warn_or_err("file %s line %i field %i\n" % (dupfnm, dupln+1, dupfld)) + if self.duplicate_pnames: + logger.warn("Parameter name has been changed to %s\n" % pid) + else: + raise RuntimeError self.map[pid] = self.np # This parameter ID has these atoms involved. self.patoms.append([self.Readers[ffname].molatom]) diff --git a/src/gmxio.py b/src/gmxio.py index 273438ada..bcd0525a8 100644 --- a/src/gmxio.py +++ b/src/gmxio.py @@ -1491,12 +1491,15 @@ def __init__(self,options,tgt_opts,forcefield): self.set_option(options,'gmxpath') # Suffix for GROMACS executables. self.set_option(options,'gmxsuffix') + # Engine for calculating things locally (e.g. polarization correction) self.engine_ = GMX # Name of the engine to pass to scripts. self.engname = "gromacs" + # Valid coordinate suffix. + self.crdsfx = ['.gro', '.pdb'] # Command prefix. self.mdpfx = "bash gmxprefix.bash" # Scripts to be copied from the ForceBalance installation directory. - self.scripts = ['gmxprefix.bash', 'md_chain.py'] + self.scripts = ['gmxprefix.bash', 'md_one.py'] ## Initialize base class. super(Thermo_GMX,self).__init__(options,tgt_opts,forcefield) diff --git a/src/nifty.py b/src/nifty.py index 3fcc29d3b..01f005c1d 100644 --- a/src/nifty.py +++ b/src/nifty.py @@ -279,6 +279,18 @@ def floatornan(word): logger.info("Setting %s to % .1e\n" % big) return big +def isnpnan(var): + """ + + Determine whether a variable is np.nan. I wrote this function + because np.isnan would crash if we use it on a dtype that is not + np.float + + """ + if type(var) in [np.float, np.float32, np.float64, np.double]: + return np.isnan(var) + return False + def col(vec): """ Given any list, array, or matrix, return a 1-column matrix. diff --git a/src/observable.py b/src/observable.py index a1139105a..f0fa790da 100644 --- a/src/observable.py +++ b/src/observable.py @@ -91,7 +91,6 @@ class Observable(object): def __init__(self, source, name=None): self.name = name if name is not None else "empty" self.sreq = [] - self.treq = [] self.dreq = ['energy_derivatives'] def __str__(self): @@ -135,12 +134,7 @@ def __init__(self, source, name=None): super(Observable_Density, self).__init__(source, name) self.name = name if name is not None else "density" - - # Calculating the density requires either a liquid or solid simulation. - self.sreq = [['liquid', 'solid']] - - # Requires timeseries of densities from the simulation. - self.treq = ['density'] + self.sreq = [{'liquid':['density'], 'solid':['density']}] def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): #==========================================# @@ -213,10 +207,8 @@ def __init__(self, source, name=None): self.name = name if name is not None else "H_vap" # Calculating the heat of vaporization requires a liquid simulation and a gas simulation. - self.sreq = [['liquid'], ['gas']] - - # Requires timeseries of energies and volumes from the simulation. - self.treq = [['energy', 'volume'], ['energy']] + self.sreq = [{'liquid':['energy', 'volume']}, + {'gas':['energy']}] def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): #==========================================# @@ -320,18 +312,11 @@ class Observable_Kappa(Observable): def __init__(self, source, name=None): """ Compressibility (applies to liquid and lipid bilayer.) """ super(Observable_H_vap, self).__init__(source, name) - self.name = name if name is not None else "H_vap" + # List of dictionaries of simulation names : timeseries that + # we extract from the simulation. - # List of dictionaries of simulations, keyed to timeseries to extract from the simulation. - # Each dictionary represents a simulation in a sequence, but the observable isn't mapped to a unique simulation each time. - # Because of this, we determine which simulation to launch (in the sequence) based on the available initial coordinates (or explicit user input). - # Depending on which simulation is executed, we require different timeseries from the simulation, and different formulas. - # But another way is to just define two observables ... need to think about it. - - self.sreq = [{'liquid':['volume'], 'bilayer':['al']}, - ] - - - # Requires timeseries of energies and volumes from the simulation. - self.treq = [['energy', 'volume'], ['energy']] + # Each dictionary represents a simulation needed to calculate + # the observable, but the required timeseries depends on the + # simulation that we run. + self.sreq = [{'liquid':['volume'], 'bilayer':['al']}], diff --git a/src/parser.py b/src/parser.py index 340b45723..b21a39214 100644 --- a/src/parser.py +++ b/src/parser.py @@ -102,6 +102,7 @@ "asynchronous" : (0, 0, 'Execute Work Queue tasks and local calculations asynchronously for improved speed', 'Targets that use Work Queue (advanced usage)'), "reevaluate" : (None, 0, 'Re-evaluate the objective function and gradients when the step is rejected (for noisy objective functions).', 'Main Optimizer'), "continue" : (0, 140, 'Continue the current run from where we left off (supports mid-iteration recovery).', 'Main Optimizer'), + "duplicate_pnames" : (0, -150, 'Allow duplicate parameter names (only if you know what you are doing!', 'Force Field Parser'), }, 'floats' : {"trust0" : (1e-1, 100, 'Levenberg-Marquardt trust radius; set to negative for nonlinear search', 'Main Optimizer'), "mintrust" : (0.0, 10, 'Minimum trust radius (if the trust radius is tiny, then noisy optimizations become really gnarly)', 'Main Optimizer'), @@ -158,6 +159,7 @@ }, 'lists' : {"fd_ptypes" : ([], -100, 'The parameter types that are differentiated using finite difference', 'In conjunction with fdgrad, fdhess, fdhessdiag; usually not needed'), "observables" : ([], 100, 'List of observables to be fitted, each must have corresponding Quantity subclass', 'Thermodynamic properties target', 'thermo'), + "simulations" : ([], 100, 'List of simulations to be run (in order to calculate fitting observables)', 'Thermodynamic properties target', 'thermo'), }, 'ints' : {"shots" : (-1, 0, 'Number of snapshots; defaults to all of the snapshots', 'Energy + Force Matching', 'AbInitio'), "fitatoms" : (0, 0, 'Number of fitting atoms; defaults to all of them', 'Energy + Force Matching', 'AbInitio'), @@ -174,7 +176,6 @@ "save_traj" : (0, -10, 'Whether to save trajectories. 0 = Never save; 1 = Delete if optimization step is good; 2 = Always save', 'Condensed phase properties', 'Liquid, Lipid'), "eq_steps" : (20000, 0, 'Number of time steps for the equilibration run.', 'Thermodynamic property targets', 'thermo'), "md_steps" : (50000, 0, 'Number of time steps for the production run.', 'Thermodynamic property targets', 'thermo'), - "n_sim_chain" : (1, 0, 'Number of simulations required to calculate observables.', 'Thermodynamic property targets', 'thermo'), }, 'bools' : {"whamboltz" : (0, -100, 'Whether to use WHAM Boltzmann Weights', 'Ab initio targets with Boltzmann weights (advanced usage)', 'AbInitio'), "sampcorr" : (0, -150, 'Whether to use the archaic sampling correction', 'Energy + Force Matching, very old option, do not use', 'AbInitio'), diff --git a/src/thermo.py b/src/thermo.py index 93e50e866..627b5a7e2 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -11,10 +11,11 @@ from forcebalance.observable import * from forcebalance.target import Target from forcebalance.finite_difference import in_fd -from forcebalance.nifty import flat, col, row, isint +from forcebalance.nifty import flat, col, row, isint, isnpnan from forcebalance.nifty import lp_dump, lp_load, wopen, _exec -from forcebalance.nifty import LinkFile, link_dir_contents +from forcebalance.nifty import GoInto, LinkFile, link_dir_contents from forcebalance.nifty import printcool, printcool_dictionary +from forcebalance.nifty import getWorkQueue from collections import defaultdict, OrderedDict @@ -363,9 +364,9 @@ def __init__(self, options, tgt_opts, forcefield): # Source data (experimental data, model parameters and weights) self.set_option(tgt_opts, "source", forceprint=True) # Observables to calculate - self.set_option(tgt_opts, "observables", "observable_names", forceprint=True) + self.set_option(tgt_opts, "observables", "user_observable_names", forceprint=True) # Length of simulation chain - self.set_option(tgt_opts, "n_sim_chain", forceprint=True) + self.set_option(tgt_opts, "simulations", "user_simulation_names", forceprint=True) # Number of time steps in the equilibration run self.set_option(tgt_opts, "eq_steps", forceprint=True) # Number of time steps in the production run @@ -380,10 +381,6 @@ def __init__(self, options, tgt_opts, forcefield): self.denoms = {} # Weights for observables self.weights = {} - # Suffixes for coordinate files - self.crdsfx = {'gromacs':['.gro', '.pdb'], - 'tinker':['.xyz', '.arc'], - 'openmm':['.pdb']}[self.engname.lower()] ## A mapping that takes us from observable names to Observable objects. self.Observable_Map = {'density' : Observable_Density, @@ -401,7 +398,7 @@ def __init__(self, options, tgt_opts, forcefield): os.path.join(self.root, self.tempdir, f)) ## Set up simulations - #self.determine_simulations() + self.prepare_simulations() def read_source(self, srcfnm): """Read and store source data. @@ -533,6 +530,9 @@ def reffld_error(reason=''): # Turn it into a pandas DataFrame. self.Data = pd.DataFrame(drows, columns=revhead[1:], index=pd.MultiIndex.from_tuples(index, names=['index', 'subindex'])) + # self.collapse = + # self. + # A list of indices (i.e. top-level indices) which correspond # to sets of simulations that we'll be running. self.Indices = [] @@ -540,17 +540,36 @@ def reffld_error(reason=''): if idx[0] not in self.Indices: self.Indices.append(idx[0]) + # Certain things (e.g. run parameters like temp, pres) are keyed to the index only. + chead = [] + crows = [] + for index in self.Indices: + crow = [] + for head in ['temp', 'pres']: + if head not in self.Data: continue + if head not in chead: chead.append(head) + rlist = list(set([i for i in self.Data.ix[index][head][:] if not isnpnan(i)])) + if len(rlist) != 1: + logger.error('Heading %s should appear once for index %s (found %i)' % (head, index, len(rlist))) + raise RuntimeError + crow.append(rlist[0]) + crows.append(crow[:]) + + # Now create the mini data table. + self.Data2 = pd.DataFrame(crows, columns=chead, index=self.Indices) + # A list of Observable objects (i.e. column headings) which # contain methods for calculating observables that we need. # Think about: # (1) How much variability is allowed across Indices? # For instance, different S_cd is permissible. self.Observables = OrderedDict() - for obsname in [stand_head(i, '')[2] for i in self.observable_names]: + self.ObsNames = [] + for obsname in [stand_head(i, '')[2] for i in self.user_observable_names]: if obsname in self.Observables: logger.error('%s was already specified as an observable' % (obsname)) self.Observables[obsname] = OrderedDict() - for ie, index in enumerate(self.Indices): + for index in self.Indices: if obsname in self.Observable_Map: newobs = self.Observable_Map[obsname](source=self.Data.ix[index]) logger.info('%s is specified as an observable, appending %s class\n' % (obsname, newobs.__class__.__name__)) @@ -558,24 +577,16 @@ def reffld_error(reason=''): else: logger.warn('%s is specified but there is no corresponding Observable class, appending empty one\n' % obsname) self.Observables[obsname][index] = Observable(name=obsname, source=self.Data.ix[index]) + self.ObsNames.append(obsname) print self.Indices print self.Observables print repr(self.Data) + # if 'temp' in self.Data: + # self.Temperatures = OrderedDict() + # for index in self.Indices: + # print self.Data['temp'][0] raw_input() - # for index in self.Indices: - # self.Observables[index] = [] - # for obsname in obsnames: - # for index, ie in enumerate(self.Indices): - # if obsname in self.Observable_Map: - # newobs = self.Observable_Map[obsname](source=self.Data.ix[index]) - # if newobs.name in [obs.name for obs in self.Observables[index]]: - # logger.error('%s is specified but a %s observable already exists' % (obsname, newobs.__class__.__name__)) - # logger.info('%s is specified as an observable, appending %s class\n' % (obsname, newobs.__class__.__name__)) - # self.Observables[index].append(newobs) - # else: - # logger.warn('%s is specified but there is no corresponding Observable class, appending empty one\n' % obsname) - # self.Observables[index].append(Observable(name=obsname, source=self.Data.ix[index])) return def find_ic(self, index, stype, icn): @@ -623,180 +634,8 @@ def find_ic(self, index, stype, icn): logger.info('Target %s Index %s Simulation %s : ' 'found initial coordinate file %s\n' % (self.name, index, stype, fpath)) found = fpath - # if found == '': - # logger.error('Target %s Index %s Simulation %s : ' - # 'could not find initial coordinate file\n' - # 'Please provide one of the following:\n%s' - # % (self.name, index, stype, '\n'.join(paths))) - # raise RuntimeError return found, 0 if numbered else icn - def determine_simulations(self): - - """ - Determine which simulations need to be run. The same - simulations are run for each index in the data set. - - Note that there may be a different number of initial - conditions (i.e. parallel runs) for different indices. - """ - - # Determine which simulations are needed. - sreqs = OrderedDict() - for obsname in self.Observables: - sreqs[obsname] = self.Observables[obsname][self.Indices[0]].sreq - - # toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs])) - - self.Simulations = OrderedDict([(i, OrderedDict()) for i in self.Indices]) - tsnames = [] - for obsname in self.Observables: - treqs = self.Observables[obsname][self.Indices[0]].treq - for treq in treqs: - if treq not in tsnames: - tsnames.append(treq) - - for index in self.Indices: - # Loop over observable names. Here we determine whether - # the initial coordinates are missing (bad), unique (good) or ambiguous (bad). - if 'n_ic' in self.Data.ix[index]: - n_ic = self.Data.ix[index]['n_ic'] - else: - n_ic = 1 - for obsname in sreqs: - for stypes in sreqs[obsname]: - if isinstance(stypes, str): - stypes = [stypes] - for icn in range(n_ic): - icfiles = [] - svalid = [] - for stype in stypes: - fpath, iframe = self.find_ic(index, stype, icn) - if fpath != '': - icfiles.append(fpath) - svalid.append(stype) - if len(icfiles) == 0: - logger.error('Target %s Index %s Simulation %s : ' - 'could not find initial coordinate file\n' - % (self.name, index, stype)) - raise RuntimeError - elif len(icfiles) > 1: - logger.error('Target %s Index %s Simulation %s : ' - 'ambiguous initial coordinate files (%s)' - % (self.name, index, stype, ' '.join(icfiles))) - self.Simulations[index][svalid[0]] = Simulation(index, svalid[0], icfiles[0], iframe, tsnames) - - print self.Simulations - print tsnames - # raw_input() - # if isinstance(sreqs[obsname], str): - # stypes = [sreqs[obsname]] - # for stype in stypes: - # print index, stype - - # for stype in toplevel: - # for index in self.Indices: - # def find_ic(icn): - # found = '' - # # Initial condition files will be searched for in the following priority: - # # targets/target_name/index/stype/ICs/stype_#.xyz - # # targets/target_name/index/stype/ICs/stype#.xyz - # # targets/target_name/index/stype/ICs/#.xyz - # # targets/target_name/index/stype/ICs/stype.xyz - # # targets/target_name/index/stype/ICs/coords.xyz - # # targets/target_name/index/stype/stype.xyz - # # targets/target_name/index/stype/coords.xyz - # # targets/target_name/index/stype.xyz - # # targets/target_name/stype.xyz - # basefnms = [(os.path.join(index, stype, 'ICs', stype+'_'+("%i" % icn)), True), - # (os.path.join(index, stype, 'ICs', stype+("%i" % icn)), True), - # (os.path.join(index, stype, 'ICs', ("%i" % icn)), True), - # (os.path.join(index, stype, 'ICs', stype), False), - # (os.path.join(index, stype, 'ICs', 'coords'), False), - # (os.path.join(index, stype, stype), False), - # (os.path.join(index, stype, 'coords'), False), - # (os.path.join(index, stype), False), - # (os.path.join(stype), False)] - # paths = [] - # for fnm, numbered in basefnms: - # for crdsfx in self.crdsfx: - # fpath = os.path.join(self.tgtdir, fnm+crdsfx) - # paths.append(fpath) - # if os.path.exists(fpath): - # if found != '': - # logger.info('Target %s Index %s Simulation %s : ' - # '%s overrides %s\n' % (self.name, index, stype, fpath)) - # else: - # if not numbered: - # M = Molecule(fpath) - # if len(M) <= icn: - # logger.error("Target %s Index %s Simulation %s : " - # "initial coordinate file %s doesn't have enough structures\n" % - # (self.name, index, stype, fpath)) - # raise RuntimeError - # logger.info('Target %s Index %s Simulation %s : ' - # 'found initial coordinate file %s\n' % (self.name, index, stype, fpath)) - # found = fpath - # if found == '': - # logger.error('Target %s Index %s Simulation %s : ' - # 'could not find initial coordinate file\n' - # 'Please provide one of the following:\n%s' - # % (self.name, index, stype, '\n'.join(paths))) - # raise RuntimeError - # return found - # if 'n_ic' in self.Data.ix[index]: - # n_ic = self.Data.ix[index]['n_ic'] - # else: - # n_ic = 1 - # for i in range(n_ic): - # fpath = find_ic(i) - - raw_input() - - # toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs])) - # print toplevel - # raw_input() - # return - - # def narrow(): - # # Get the names of simulations that are REQUIRED to calculate the observables. - # toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs])) - # # Whoa, this is a deeply nested loop. What does it do? - # # First loop over the elements in "sreqs" for each observable name. - # # If the element is a string, then it's a required simulation name (top level). - # # If the element is a list, then it's a list of valid simulation names - # # and we need to narrow the list down. - # # For the ones that are lists (and have any intersection with the top level), - # # delete the ones that don't intersect. - # sreq0 = copy.deepcopy(sreqs) - # for obsname in sreqs: - # for sims in sreqs[obsname]: - # if type(sims) == list: - # if len(sims) == 1: - # sreqs[obsname] = [sims[0]] - # elif any([i in sims for i in toplevel]): - # for j in sims: - # if j not in toplevel: sims.remove(j) - # return sreqs != sreq0 - - # print sreqs - # while narrow(): - # print sreqs - # For the leftover observables where there is still some ambiguity, - # we attempt - # To do: Figure this out from existing initial conditions maybe - # for obsname in sreqs: - # for sims in sreqs[obsname]: - # if type(sims) == list: - # for sim in sims: - # if has_ic(sim): - # sreqs[obsname] = [sim] - - - # self.Simulations = OrderedDict([(i, []) for i in self.Indices]) - - return - def prepare_simulations(self): """ @@ -810,110 +649,51 @@ def prepare_simulations(self): # print narrow() # The list of simulations that we'll be running. + self.SimNames = [i.lower() for i in self.user_simulation_names] self.Simulations = OrderedDict([(i, []) for i in self.Indices]) - - return - - def launch_simulation(self, index, simname): - - """ - - Launch a simulation - either locally or via the Work Queue. - This function is intended to be run within the folder: - target_name/iteration_number/system_index/simulation_name/initial_condition OR - target_name/iteration_number/system_index/simulation_name - - """ - - wq = getWorkQueue() - if not (os.path.exists('result.p') or os.path.exists('result.p.bz2')): - link_dir_contents(os.path.join(self.root,self.rundir),os.getcwd()) - self.last_traj += [os.path.join(os.getcwd(), i) for i in self.extra_output] - self.liquid_mol[simnum%len(self.liquid_mol)].write(self.liquid_coords, ftype='tinker' if self.engname == 'tinker' else None) - cmdstr = '%s python npt.py %s %.3f %.3f' % (self.nptpfx, self.engname, temperature, pressure) - if wq == None: - logger.info("Running condensed phase simulation locally.\n") - logger.info("You may tail -f %s/npt.out in another terminal window\n" % os.getcwd()) - _exec(cmdstr, copy_stderr=True, outfnm='npt.out') - else: - queue_up(wq, command = cmdstr+' &> npt.out', - input_files = self.nptfiles + self.scripts + ['forcebalance.p'], - output_files = ['npt_result.p.bz2', 'npt.out'] + self.extra_output, tgt=self) - - # NAMES FOR OBJECTS! - - # Timeseries: Time series of an instantaneous observable that is - # returned by the MD simulation. - - # Observable: A thermodynamic property which can be compared to - # experiment and possesses methods for calculating the property - # and its derivatives. - - # State? Point? What should this be called?? - - # # print revhead[1:] - # # for rn, row in enumerate(drows): - # # print index[rn], row - - # # print repr(self.Data) - - # # # pd.DataFrame([OrderedDict([(head, row[i]) for i, head in revised_heading if row[i] != '']) for row in source.table]) - - - # # # pd.DataFrame(OrderedDict([(head,[row[i] for row in source.table]) for i, head in enumerate(revised_heading)])) - # # # print self.Data.__repr__ - # # # raw_input() - - # # return + # Dictionary of time series to extract from each simulation. + SimTS = defaultdict(set) + # Check to see whether each observable can be unambiguously calculated from the specified simulations + for obsname in self.ObsNames: + sreq = self.Observables[obsname][self.Indices[0]].sreq + ssels = [] + SimTS_ = defaultdict(set) + for sdct in sreq: + # This is a dictionary of simulation names : + if len(set(self.SimNames).intersection(sdct.keys())) > 1: + logger.error("Ambiguous: Don't know which simulation to use in calculating observable %s" % obsname) + logger.error("Choose ONE from this menu of required simulations: [%s]" % ' '.join(sdct.keys())) + raise RuntimeError + if len(set(self.SimNames).intersection(sdct.keys())) == 0: + logger.error("Missing Simulation: Cannot calculate observable %s" % obsname) + logger.error("Choose ONE from this menu of required simulations: [%s]" % ' '.join(sdct.keys())) + raise RuntimeError + # One of the simulations that will be used in calculating this observable. + ssel = list(set(self.SimNames).intersection(sdct.keys()))[0] + SimTS[ssel].update(set(sdct[ssel])) + SimTS_[ssel].update(set(sdct[ssel])) + printcool_dictionary({i:' '.join(sorted(list(SimTS_[i]))) for i in sorted(SimTS_.keys())}, title="Observable %s uses these simulations : timeseries" % obsname) + printcool_dictionary({i:' '.join(sorted(list(SimTS[i]))) for i in sorted(SimTS.keys())}, title="Needed Simulations : Extracted Timeseries") + unused = sorted(list(set(self.SimNames).difference(set(SimTS.keys())))) + if len(unused) > 0: + logger.error("Simulation %s is specified but it's never used to calculate any observables" % ', '.join(unused)) + raise RuntimeError - # fp = open(expdata) - - # line = fp.readline() - # foundHeader = False - # names = None - # units = None - # label_header = None - # label_unit = None - # count = 0 - # metadata = {} - # while line: - # # Skip comments and blank lines - # if line.lstrip().startswith("#") or not line.strip(): - # line = fp.readline() - # continue - # # Metadata is denoted using - # if "=" in line: # Read variable - # param, value = line.split("=") - # param = param.strip().lower() - # metadata[param] = value - # # if param == "denoms": - # # for e, v in enumerate(value.split()): - # # self.denoms[self.observables[e]] = float(v) - # # elif param == "weights": - # # for e, v in enumerate(value.split()): - # # self.weights[self.observables[e]] = float(v) - # elif foundHeader: # Read exp data - # count += 1 - # vals = line.split() - # label = (vals[0], label_header, label_unit) - # refs = np.array(vals[1:-2:2]).astype(float) - # wts = np.array(vals[2:-2:2]).astype(float) - # temperature = float(vals[-2]) - # pressure = None if vals[-1].lower() == "none" else \ - # float(vals[-1]) - # dp = Point(count, label=label, refs=refs, weights=wts, - # names=names, units=units, - # temperature=temperature, pressure=pressure) - # self.points.append(dp) - # else: # Read headers - # foundHeader = True - # headers = zip(*[tuple(h.split("_")) for h in line.split() - # if h != "w"]) - # label_header = list(headers[0])[0] - # label_unit = list(headers[1])[0] - # names = list(headers[0][1:-2]) - # units = list(headers[1][1:-2]) - # line = fp.readline() + for index in self.Indices: + for stype, tsset in SimTS.items(): + if 'n_ic' in self.Data.ix[index]: + n_ic = self.Data.ix[index]['n_ic'] + if n_ic < 1: + logger.error("n_ic must >= 1") + raise RuntimeError + else: + n_ic = 1 + for icn in range(n_ic): + icfnm, icframe = self.find_ic(index, stype, icn) + sname = "%s_%i" % (stype, icn) if n_ic > 1 else stype + self.Simulations[index].append(Simulation(sname, index, stype, icfnm, icframe, sorted(list(tsset)))) + + return def retrieve(self, dp): """Retrieve the molecular dynamics (MD) results and store the calculated @@ -950,7 +730,7 @@ def retrieve(self, dp): def submit_jobs(self, mvals, AGrad=True, AHess=True): """This routine is called by Objective.stage() and will run before "get". - It submits the jobs and the stage() function will wait for jobs + It submits the jobs (or runs them locally) and the stage() function will wait for jobs to complete. Parameters @@ -967,6 +747,49 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True): Nothing. """ + + printcool("Submitting jobs") + cwd = os.getcwd() + wq = getWorkQueue() + for index in self.Indices: + # if 'temp' in self.Data: + # tset = set([iself.Data['temp'].ix[index][:]) + temp = self.Data2['temp'].ix[index] if 'temp' in self.Data2 else None + pres = self.Data2['pres'].ix[index] if 'pres' in self.Data2 else None + for Sim in self.Simulations[index]: + simd = os.path.join(os.getcwd(), index, Sim.name) + GoInto(simd) + # Submit or run the simulation if the result file does not exist. + if not (os.path.exists('result.p') or os.path.exists('result.p.bz2')): + # Write to disk: Force field object, current parameter values, target options + with wopen('forcebalance.p') as f: lp_dump((self.FF,mvals,self.OptionDict),f) + M = Molecule(os.path.join(self.root, Sim.initial))[Sim.iframe] + M.write("%s%s" % (Sim.stype, self.crdsfx[0])) + # # Get relevant files from the target folder, I suppose. + # link_dir_contents(os.path.join(self.root,self.rundir),os.getcwd()) + # # Determine initial coordinates. + # self.last_traj += [os.path.join(os.getcwd(), i) for i in self.extra_output] + # self.liquid_mol[simnum%len(self.liquid_mol)].write(self.liquid_coords, ftype='tinker' if self.engname == 'tinker' else None) + # Command for running the simulation. + cmdlist = ['%s python md_one.py %s' % (self.mdpfx, Sim.stype)] + if temp != None: + cmdlist.append('-T %f' % float(temp)) + if pres != None: + cmdlist.append('-P %f' % float(pres)) + cmdstr = ' '.join(cmdlist) + print cmdstr + # # cmdstr = '%s python md1.py %s %.3f %.3f' % (self.runpfx, temperature, pressure) + # if wq == None: + # logger.info("Running condensed phase simulation locally.\n") + # logger.info("You may tail -f %s/npt.out in another terminal window\n" % os.getcwd()) + # _exec(cmdstr, copy_stderr=True, outfnm='npt.out') + # else: + # queue_up(wq, command = cmdstr+' &> npt.out', + # input_files = self.nptfiles + self.scripts + ['forcebalance.p'], + # output_files = ['npt_result.p.bz2', 'npt.out'] + self.extra_output, tgt=self) + os.chdir(cwd) + return + # Set up and run the simulation chain on all points. for pt in self.points: # Create subdir @@ -1254,31 +1077,43 @@ class Simulation(object): type, initial condition). """ - def __init__(self, index, stype, initial, iframe, tsnames): + def __init__(self, name, index, stype, initial, iframe, tsnames): + # The simulation name will identify the simulation within a collection + # belonging to the Index. + self.name = name + # The Index that the simulation belongs to. self.index = index + # The type of simulation (liquid, gas, solid, bilayer...) self.stype = stype + # The file containing initial coordinates. self.initial = initial + # The frame number in the initial coordinate file. self.iframe = iframe + # The time series for the simulation. self.timeseries = OrderedDict([(i, []) for i in tsnames]) def __str__(self): msg = [] - if self.temperature is None: - msg.append("State: Unknown.") - elif self.pressure is None: - msg.append("State: Point " + str(self.idnr) + " at " + - str(self.temperature) + " K.") - else: - msg.append("State: Point " + str(self.idnr) + " at " + - str(self.temperature) + " K and " + - str(self.pressure) + " bar.") - - msg.append("Point " + str(self.idnr) + " reference data " + "-"*30) - for key in self.ref: - msg.append(" " + key.strip() + " = " + str(self.ref[key]).strip()) + msg.append("Simulation: Name %s, Index %s, Type %s" % (self.name, self.index, self.stype)) + msg.append("Initial Conditions: File %s Frame %i" % (self.initial, self.iframe)) + msg.append("Timeseries Names: %s" % (', '.join(self.timeseries.keys()))) + return "\n".join(msg) + # if self.temperature is None: + # msg.append("State: Unknown.") + # elif self.pressure is None: + # msg.append("State: Point " + str(self.idnr) + " at " + + # str(self.temperature) + " K.") + # else: + # msg.append("State: Point " + str(self.idnr) + " at " + + # str(self.temperature) + " K and " + + # str(self.pressure) + " bar.") + + # msg.append("Point " + str(self.idnr) + " reference data " + "-"*30) + # for key in self.ref: + # msg.append(" " + key.strip() + " = " + str(self.ref[key]).strip()) - msg.append("Point " + str(self.idnr) + " calculated data " + "-"*30) - for key in self.data: - msg.append(" " + key.strip() + " = " + str(self.data[key]).strip()) + # msg.append("Point " + str(self.idnr) + " calculated data " + "-"*30) + # for key in self.data: + # msg.append(" " + key.strip() + " = " + str(self.data[key]).strip()) - return "\n".join(msg) + # return "\n".join(msg) diff --git a/studies/004_thermo/single.in b/studies/004_thermo/single.in index 275265680..6cd1bec2c 100644 --- a/studies/004_thermo/single.in +++ b/studies/004_thermo/single.in @@ -68,7 +68,17 @@ type Thermo_GMX weight 1.0 source expset.txt quantities density h_vap -n_sim_chain 2 +simulations liquid gas +md_steps 100000 +eq_steps 50000 +$end + +$target +name Lipid_TAB +type Thermo_GMX +weight 1.0 +source lipidcol1.txt +observables al scd kappa md_steps 100000 eq_steps 50000 $end diff --git a/studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt b/studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt index c67aece18..2001d85b1 100644 --- a/studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt +++ b/studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt @@ -1,6 +1,6 @@ Index T P Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic -50C 323.15 1 0.631 1 C15 C34 1 58 1 10 - C17 0.198144 C36 0.198144 +50C 1 0.631 1 C15 C34 1 58 1 10 + 323.15 C17 0.198144 C36 0.198144 C18 0.198128 C37 0.198128 C19 0.198111 C38 0.198111 C20 0.198095 C39 0.198095 From df46c9a5e02e717f0d6158a3d9df4d53cdd1dbdc Mon Sep 17 00:00:00 2001 From: Lee-Ping Date: Sun, 20 Apr 2014 16:48:11 -0700 Subject: [PATCH 13/25] A few changes for energy/force matching and Q-Chem output parsing (cherry pick over to main). --- src/abinitio.py | 36 ++++++++++++++++++++++++++++++------ src/molecule.py | 5 +++++ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/src/abinitio.py b/src/abinitio.py index b6844cd51..7d288e18c 100644 --- a/src/abinitio.py +++ b/src/abinitio.py @@ -446,28 +446,34 @@ def read_reference_data(self): self.ntq = 0 def indicate(self): - Headings = ["Observable", "Difference\n(Calc-Ref)", "Denominator\n RMS (Ref)", " Percent \nDifference", "Weight"] + Headings = ["Observable", "Difference\n(Calc-Ref)", "Denominator\n RMS (Ref)", " Percent \nDifference", "Weight", "Contribution"] Data = OrderedDict([]) if self.energy: Data['Energy (kJ/mol)'] = ["%8.4f" % self.e_err, "%8.4f" % self.e_ref, "%.4f%%" % (self.e_err_pct*100), - "%.3f" % self.w_energy] + "%.3f" % self.w_energy, + "%8.4f" % self.e_ctr] if self.force: Data['Gradient (kJ/mol/A)'] = ["%8.4f" % (self.f_err/10), "%8.4f" % (self.f_ref/10), "%.4f%%" % (self.f_err_pct*100), - "%.3f" % self.w_force] + "%.3f" % self.w_force, + "%8.4f" % self.f_ctr] if self.use_nft: Data['Net Force (kJ/mol/A)'] = ["%8.4f" % (self.nf_err/10), "%8.4f" % (self.nf_ref/10), "%.4f%%" % (self.nf_err_pct*100), - "%.3f" % self.w_netforce] + "%.3f" % self.w_netforce, + "%8.4f" % self.nf_ctr] Data['Torque (kJ/mol/rad)'] = ["%8.4f" % self.tq_err, "%8.4f" % self.tq_ref, "%.4f%%" % (self.tq_err_pct*100), - "%.3f" % self.w_torque] + "%.3f" % self.w_torque, + "%8.4f" % self.tq_ctr] self.printcool_table(data=Data, headings=Headings, color=0) + if self.force: + logger.info("Maximum force error on atom %i (%s), frame %i, %8.4f kJ/mol/A\n" % (self.maxfatom, self.mol.elem[self.maxfatom], self.maxfshot, self.maxdf/10)) def energy_all(self): if hasattr(self, 'engine'): @@ -710,6 +716,10 @@ def callM(mvals_): return self.energy_force_transform() for p in self.pgrad: dM_all[:,p,:], ddM_all[:,p,:] = f12d3p(fdwrap(callM, mvals, p), h = self.h, f0 = M_all) + if self.force and not in_fd(): + self.maxfatom = -1 + self.maxfshot = -1 + self.maxdf = 0.0 for i in range(NS): if i % 100 == 0: logger.debug("\rIncrementing quantities for snapshot %i\r" % i) @@ -740,7 +750,12 @@ def callM(mvals_): # Increment the average values. a = 1 if self.force: - dfrcarray = np.mean(np.array([np.linalg.norm(M[a+3*j:a+3*j+3] - Q[a+3*j:a+3*j+3]) for j in range(nat)])) + dfrcarray_ = np.array([np.linalg.norm(M[a+3*j:a+3*j+3] - Q[a+3*j:a+3*j+3]) for j in range(nat)]) + if not in_fd() and np.max(dfrcarray_) > self.maxdf: + self.maxdf = np.max(dfrcarray_) + self.maxfatom = np.argmax(dfrcarray_) + self.maxfshot = i + dfrcarray = np.mean(dfrcarray_) qfrcarray = np.mean(np.array([np.linalg.norm(Q[a+3*j:a+3*j+3]) for j in range(nat)])) dF_M += P*dfrcarray dF_Q += R*dfrcarray @@ -1035,17 +1050,26 @@ def callM(mvals_): dTfrac = MBP * dT_M / qT_M + QBP * dT_Q / qT_Q # Save values to qualitative indicator if not inside finite difference code. if not in_fd(): + # Contribution from energy and force parts. + self.e_ctr = (MBP * weighted_variance(np.array([SPiXi[0]]),np.array([WCiW[0]]),Z,X0_M,X0_M,NCP1,subtract_mean = not self.absolute) + + QBP * weighted_variance(np.array([SRiXi[0]]),np.array([WCiW[0]]),Y,X0_Q,X0_Q,NCP1,subtract_mean = not self.absolute)) self.e_ref = MBP * np.sqrt(QQ_M[0]/Z - Q0_M[0]**2/Z/Z) + QBP * np.sqrt((QQ_Q[0]/Y - Q0_Q[0]**2/Y/Y)) self.e_err = dE self.e_err_pct = dEfrac if self.force: + self.f_ctr = (MBP * weighted_variance(SPiXi[1:1+3*nat],WCiW[1:1+3*nat],Z,X0_M,X0_M,NCP1,subtract_mean = False) + + QBP * weighted_variance(SRiXi[1:1+3*nat],WCiW[1:1+3*nat],Y,X0_Q,X0_Q,NCP1,subtract_mean = False)) self.f_ref = qF self.f_err = dF self.f_err_pct = dFfrac if self.use_nft: + self.nf_ctr = (MBP * weighted_variance(SPiXi[1+3*nat:1+3*nat+3*nnf],WCiW[1+3*nat:1+3*nat+3*nnf],Z,X0_M,X0_M,NCP1,subtract_mean = False) + + QBP * weighted_variance(SRiXi[1+3*nat:1+3*nat+3*nnf],WCiW[1+3*nat:1+3*nat+3*nnf],Y,X0_Q,X0_Q,NCP1,subtract_mean = False)) self.nf_ref = qN self.nf_err = dN self.nf_err_pct = dNfrac + self.tq_ctr = (MBP * weighted_variance(SPiXi[1+3*nat+3*nnf:1+3*nat+3*nnf+3*ntq],WCiW[1+3*nat+3*nnf:1+3*nat+3*nnf+3*ntq],Z,X0_M,X0_M,NCP1,subtract_mean = False) + + QBP * weighted_variance(SRiXi[1+3*nat+3*nnf:1+3*nat+3*nnf+3*ntq],WCiW[1+3*nat+3*nnf:1+3*nat+3*nnf+3*ntq],Y,X0_Q,X0_Q,NCP1,subtract_mean = False)) self.tq_ref = qT self.tq_err = dT self.tq_err_pct = dTfrac diff --git a/src/molecule.py b/src/molecule.py index 2f3ccbd34..2e17bcc05 100644 --- a/src/molecule.py +++ b/src/molecule.py @@ -2454,6 +2454,11 @@ def read_qcout(self, fnm, errok = [], **kwargs): sline = line.split() mkchgThis.append(float(sline[2])) mkspnThis.append(float(sline[3])) + elif re.match("^[0-9]+ +[A-Z][a-z]?( +[-+]?([0-9]*\.)?[0-9]+){1}$", line): + MMode = 2 + sline = line.split() + mkchgThis.append(float(sline[2])) + mkspnThis.append(0.0) elif MMode == 2: # Break out of the loop if we encounter anything other than Mulliken charges mkchg.append(mkchgThis[:]) mkspn.append(mkspnThis[:]) From adc1ff4b0f305919bd7a24b2e6eec7f47533c102 Mon Sep 17 00:00:00 2001 From: Lee-Ping Date: Mon, 21 Apr 2014 09:34:47 -0700 Subject: [PATCH 14/25] Improvements for energy/force and frequency matching --- src/abinitio.py | 24 +++++++++++++++++++++--- src/molecule.py | 2 ++ src/parser.py | 2 +- src/vibration.py | 18 +++++++++++++----- 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/src/abinitio.py b/src/abinitio.py index 7d288e18c..79ff246ba 100644 --- a/src/abinitio.py +++ b/src/abinitio.py @@ -6,7 +6,7 @@ import os import shutil -from forcebalance.nifty import col, eqcgmx, flat, floatornan, fqcgmx, invert_svd, kb, printcool, bohrang, warn_press_key, warn_once +from forcebalance.nifty import col, eqcgmx, flat, floatornan, fqcgmx, invert_svd, kb, printcool, bohrang, warn_press_key, warn_once, pvec1d import numpy as np from forcebalance.target import Target from forcebalance.molecule import Molecule, format_xyz_coord @@ -445,6 +445,10 @@ def read_reference_data(self): self.nnf = 0 self.ntq = 0 + # Normalize Boltzmann weights. + self.boltz_wts /= sum(self.boltz_wts) + self.qmboltz_wts /= sum(self.qmboltz_wts) + def indicate(self): Headings = ["Observable", "Difference\n(Calc-Ref)", "Denominator\n RMS (Ref)", " Percent \nDifference", "Weight", "Contribution"] Data = OrderedDict([]) @@ -688,6 +692,8 @@ def get_energy_force(self, mvals, AGrad=False, AHess=False): # Objective functions SPiXi = np.zeros(NCP1) SRiXi = np.zeros(NCP1) + # Debug: Store all objective function contributions + XiAll = np.zeros((NS, NCP1)) if AGrad: SPiXi_p = np.zeros((NP,NCP1)) SRiXi_p = np.zeros((NP,NCP1)) @@ -792,6 +798,7 @@ def callM(mvals_): Xi = np.outer(M,M) - 2*np.outer(Q,M) + np.outer(Q,Q) else: Xi = X**2 + XiAll[i] = Xi.copy() SPiXi += P * Xi SRiXi += R * Xi #==============================================================# @@ -972,8 +979,11 @@ def callM(mvals_): MBP = 1 - self.qmboltz C = MBP*(QQ_M-Q0_M*Q0_M/Z)/Z + QBP*(QQ_Q-Q0_Q*Q0_Q/Y)/Y # Normalize the force components - for i in range(1, len(C), 3): - C[i:i+3] = np.mean(C[i:i+3]) + # Normalize by atom? + # for i in range(1, len(C), 3): + # C[i:i+3] = np.mean(C[i:i+3]) + # Or normalize all forces? + C[1:len(C)] = np.mean(C[1:len(C)]) Ci = 1. / C WCiW = WM * Ci * WM #==============================================================# @@ -987,6 +997,14 @@ def callM(mvals_): else: X2_M = weighted_variance(SPiXi,WCiW,Z,X0_M,X0_M,NCP1,subtract_mean = not self.absolute) X2_Q = weighted_variance(SRiXi,WCiW,Y,X0_Q,X0_Q,NCP1,subtract_mean = not self.absolute) + # Print out all energy / force contributions, useful for debugging. + # for i in range(XiAll.shape[0]): + # efctr = weighted_variance(XiAll[i],WCiW,Z,X0_M,X0_M,NCP1,subtract_mean = not self.absolute) + # WCiW1 = WCiW.copy() + # for j in range(1, len(WCiW1)): + # WCiW1[j] = 0.0 + # ectr = weighted_variance(XiAll[i],WCiW1,Z,X0_M,X0_M,NCP1,subtract_mean = not self.absolute) + # print i, "ectr = %.3f efctr = %.3f" % (ectr, efctr) for p in self.pgrad: if not AGrad: continue X2_M_p[p] = weighted_variance(SPiXi_p[p],WCiW,Z,2*X0_M,M0_M_p[p],NCP1,subtract_mean = not self.absolute) diff --git a/src/molecule.py b/src/molecule.py index 2e17bcc05..f423ec254 100644 --- a/src/molecule.py +++ b/src/molecule.py @@ -1036,6 +1036,8 @@ def write(self,fnm=None,ftype=None,append=False,select=None,**kwargs): elif ftype == None: ftype = os.path.splitext(fnm)[1][1:] ## Fill in comments. + if 'comms' not in self.Data: + self.comms = ['Generated by ForceBalance from %s: Frame %i of %i' % (fnm, i+1, self.ns) for i in range(self.ns)] if 'xyzs' in self.Data and len(self.comms) < len(self.xyzs): for i in range(len(self.comms), len(self.xyzs)): self.comms.append("Frame %i: generated by ForceBalance" % i) diff --git a/src/parser.py b/src/parser.py index b21a39214..27e731bc0 100644 --- a/src/parser.py +++ b/src/parser.py @@ -194,7 +194,7 @@ "absolute" : (0, -150, 'When matching energies in AbInitio, do not subtract the mean energy gap.', 'Energy matching (advanced usage)', 'abinitio'), "cauchy" : (0, 0, 'Normalize interaction energies each using 1/(denom**2 + reference**2) which resembles a Cauchy distribution', 'Interaction energy targets', 'interaction'), "attenuate" : (0, 0, 'Normalize interaction energies using 1/(denom**2 + reference**2) only for repulsive interactions greater than denom.', 'Interaction energy targets', 'interaction'), - "normalize" : (0, -150, 'Divide interaction energy objective by the number of snapshots', 'Interaction energy targets', 'interaction'), + "normalize" : (0, -150, 'Divide objective function by the number of snapshots / vibrations', 'Interaction energy / vibrational mode targets', 'interaction, vibration'), "manual" : (0, -150, 'Give the user a chance to fill in condensed phase stuff on the zeroth step', 'Condensed phase property targets (advanced usage)', 'liquid'), "hvap_subaverage" : (0, -150, 'Don\'t target the average enthalpy of vaporization and allow it to freely float (experimental)', 'Condensed phase property targets (advanced usage)', 'liquid'), "force_cuda" : (0, -150, 'Force the external npt.py script to crash if CUDA Platform not available', 'Condensed phase property targets (advanced usage)', 'liquid_openmm'), diff --git a/src/vibration.py b/src/vibration.py index 3b8127d40..0a1e98928 100644 --- a/src/vibration.py +++ b/src/vibration.py @@ -45,6 +45,7 @@ def __init__(self,options,tgt_opts,forcefield): #======================================# self.set_option(tgt_opts,'wavenumber_tol','denom') self.set_option(tgt_opts,'reassign_modes','reassign') + self.set_option(tgt_opts,'normalize') #======================================# # Variables which are set here # @@ -105,7 +106,7 @@ def read_reference_data(self): def indicate(self): """ Print qualitative indicator. """ - # if self.reassign == 'overlap' : count_assignment(self.c2r) + if self.reassign == 'overlap' : count_assignment(self.c2r) banner = "Frequencies (wavenumbers)" headings = ["Mode #", "Reference", "Calculated", "Difference", "Ref(dot)Calc"] data = OrderedDict([(i, [self.ref_eigvals[i], self.calc_eigvals[i], self.calc_eigvals[i] - self.ref_eigvals[i], "%.4f" % self.overlaps[i]]) for i in range(len(self.ref_eigvals))]) @@ -154,14 +155,21 @@ def get_eigvals(mvals_): self.FF.make(mvals_) eigvals, eigvecs = self.vibration_driver() eigvecs_nrm, eigvecs_nrm_mw = self.process_vectors(eigvecs) + # The overlap metric may take into account some frequency differences + dev = np.array([[(np.abs(i-j)/1000)/(1.0+np.abs(i-j)/1000) for j in self.ref_eigvals] for i in eigvals]) + for i in range(dev.shape[0]): + dev[i, :] /= max(dev[i, :]) + if self.reassign in ['permute', 'overlap']: - a = np.array([[int(1e6*(1.0-np.dot(v1.flatten(),v2.flatten())**2)) for v2 in self.ref_eigvecs_nrm] for v1 in eigvecs_nrm_mw]) # In the matrix that we constructed, these are the column numbers (reference mode numbers) # that are mapped to the row numbers (calculated mode numbers) if self.reassign == 'permute': + a = np.array([[int(1e6*(1.0-np.dot(v1.flatten(),v2.flatten())**2)) for v2 in self.ref_eigvecs_nrm] for v1 in eigvecs_nrm_mw]) c2r = Assign(a) eigvals = eigvals[c2r] elif self.reassign == 'overlap': + a = np.array([[(1.0-np.dot(v1.flatten(),v2.flatten())**2) for v2 in self.ref_eigvecs_nrm] for v1 in eigvecs_nrm_mw]) + a += dev c2r = np.argmin(a, axis=0) eigvals_p = [] for j in c2r: @@ -185,11 +193,11 @@ def get_eigvals(mvals_): if AGrad or AHess: for p in self.pgrad: dV[p,:], _ = f12d3p(fdwrap(get_eigvals, mvals, p), h = self.h, f0 = calc_eigvals) - Answer['X'] = np.dot(D,D) / self.denom**2 + Answer['X'] = np.dot(D,D) / self.denom**2 / (len(D) if self.normalize else 1) for p in self.pgrad: - Answer['G'][p] = 2*np.dot(D, dV[p,:]) / self.denom**2 + Answer['G'][p] = 2*np.dot(D, dV[p,:]) / self.denom**2 / (len(D) if self.normalize else 1) for q in self.pgrad: - Answer['H'][p,q] = 2*np.dot(dV[p,:], dV[q,:]) / self.denom**2 + Answer['H'][p,q] = 2*np.dot(dV[p,:], dV[q,:]) / self.denom**2 / (len(D) if self.normalize else 1) if not in_fd(): self.calc_eigvals = calc_eigvals self.objective = Answer['X'] From 7726de98fec7e5ab8bbe7ee69730bfd445bf7542 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Mon, 21 Apr 2014 17:16:03 -0700 Subject: [PATCH 15/25] Work in progress --- src/data/md_one.py | 268 +++++++++++++++++++++++++++++++++++++++++++++ src/nifty.py | 26 ++--- src/thermo.py | 93 ++++++++++------ 3 files changed, 340 insertions(+), 47 deletions(-) create mode 100644 src/data/md_one.py diff --git a/src/data/md_one.py b/src/data/md_one.py new file mode 100644 index 000000000..b52a87d9b --- /dev/null +++ b/src/data/md_one.py @@ -0,0 +1,268 @@ +#!/usr/bin/env python + +""" +md_one +======== + +This script is a part of ForceBalance and runs a single simulation +that may be combined with others to calculate general thermodynamic +properties. + +This script is meant to be launched automatically by ForceBalance. + +""" + +#==================# +#| Global Imports |# +#==================# + +import os +import argparse +import numpy as np +import importlib as il + +from forcebalance.nifty import lp_dump, lp_load, wopen +from forcebalance.nifty import printcool, printcool_dictionary +from forcebalance.molecule import Molecule + +from collections import OrderedDict + +from forcebalance.output import getLogger +logger = getLogger(__name__) + +#========================================================# +#| Global, user-tunable variables (simulation settings) |# +#========================================================# + +# Note: Only the simulation settings that vary across different +# simulations in a target may be specified on the command line. + +parser = argparse.ArgumentParser() +parser.add_argument('simulation', type=str, + help='The simulation name (important; used in setting up)') +parser.add_argument('-T', type=float, default=None, + help='Simulation temperature, leave blank for constant energy') +parser.add_argument('-P', type=float, default=None, + help='Simulation pressure, leave blank for constant volume') +parser.add_argument('-g', action='store_true', + help='Calculate gradients for output time series') + +# These settings may be specified for debugging purposes (i.e. they +# will override what we read from forcebalance.p) +parser.add_argument('--nequil', type=int, default=0, + help='Number of steps for equilibration run (leave blank to use default from forcebalance.p)') +parser.add_argument('--nsteps', type=int, default=0, + help='Number of steps for production run (leave blank to use default from forcebalance.p)') +parser.add_argument('--timestep', type=float, default=0.0, + help='Time step in femtoseconds (leave blank to use default from forcebalance.p)') +parser.add_argument('--interval', type=float, default=0.0, + help='Sampling interval in picoseonds (leave blank to use default from forcebalance.p)') +parser.add_argument('--outputs', type=list, nargs='+', + help='Specify the time series which are written to disk') + +args = parser.parse_args() + +def main(): + + """Usage: + + (prefix.sh) md_one.py + -T + -P + -g (if gradients of output timeseries are desired) + [Debugging Options Below] + --nequil + --nsteps + --outputs + + This program is meant to be called automatically by ForceBalance + because most options are loaded from the 'forcebalance.p' input + file. + + """ + + printcool("ForceBalance simulation using engine: %s" % engname.upper(), + color=4, bold=True) + #---- + # Load the ForceBalance pickle file which contains: + #---- + # - Force field object + # - Optimization parameters + # - Options from the Target object that launched this simulation + FF, mvals, TgtOptions = lp_load(open('forcebalance.p')) + FF.ffdir = '.' + # Write the force field file. + FF.make(mvals) + # Switch for calculating gradients of output time series. + AGrad = args.g + + #---- + # Load the options that are set in the ForceBalance input file. + #---- + # Finite difference step size + h = TgtOptions['h'] + # Active parameters for gradient (if we filtered out the + # parameters that are known to have no effect) + pgrad = TgtOptions['pgrad'] + # MD options; time step (fs), production steps, equilibration steps, interval for saving data (ps) + timestep = args.timestep if args.timestep > 0 else SimOptions['timestep'] + nsteps = args.nsteps if args.nsteps > 0 else SimOptions['nsteps'] + nequil = args.nequil if args.nequil > 0 else SimOptions['nequil'] + intvl = args.intvl if args.intvl > 0 else SimOptions['interval'] + fnm = SimOptions['coords'] + if not fnm.startswith(args.simulation): + logger.error("Problem with SimOptions['coords'] (%s):\n" % fnm) + logger.error("Coordinate file must be consistent with simulation type (%s)\n" % args.simulation) + + # Number of threads, multiple timestep integrator, anisotropic box etc. + threads = SimOptions.get('md_threads', 1) + mts = SimOptions.get('mts_integrator', 0) + rpmd_beads = SimOptions.get('rpmd_beads', 0) + force_cuda = SimOptions.get('force_cuda', 0) + nbarostat = SimOptions.get('n_mcbarostat', 25) + anisotropic = SimOptions.get('anisotropic_box', 0) + minimize = SimOptions.get('minimize_energy', 1) + + + + #---- + # Setting up MD simulations + #---- + EngOpts = OrderedDict() + EngOpts["liquid"] = OrderedDict([("coords", liquid_fnm), ("mol", ML), ("pbc", True)]) + EngOpts["gas"] = OrderedDict([("coords", gas_fnm), ("mol", MG), ("pbc", False)]) + GenOpts = OrderedDict([('FF', FF)]) + if engname == "openmm": + # OpenMM-specific options + EngOpts["liquid"]["platname"] = 'CUDA' + EngOpts["gas"]["platname"] = 'Reference' + if force_cuda: + try: Platform.getPlatformByName('CUDA') + except: raise RuntimeError('Forcing failure because CUDA platform unavailable') + if threads > 1: logger.warn("Setting the number of threads will have no effect on OpenMM engine.\n") + elif engname == "gromacs": + # Gromacs-specific options + GenOpts["gmxpath"] = TgtOptions["gmxpath"] + GenOpts["gmxsuffix"] = TgtOptions["gmxsuffix"] + EngOpts["liquid"]["gmx_top"] = os.path.splitext(liquid_fnm)[0] + ".top" + EngOpts["liquid"]["gmx_mdp"] = os.path.splitext(liquid_fnm)[0] + ".mdp" + EngOpts["gas"]["gmx_top"] = os.path.splitext(gas_fnm)[0] + ".top" + EngOpts["gas"]["gmx_mdp"] = os.path.splitext(gas_fnm)[0] + ".mdp" + if force_cuda: logger.warn("force_cuda option has no effect on Gromacs engine.") + if rpmd_beads > 0: raise RuntimeError("Gromacs cannot handle RPMD.") + if mts: logger.warn("Gromacs not configured for multiple timestep integrator.") + if anisotropic: logger.warn("Gromacs not configured for anisotropic box scaling.") + elif engname == "tinker": + # Tinker-specific options + GenOpts["tinkerpath"] = TgtOptions["tinkerpath"] + EngOpts["liquid"]["tinker_key"] = os.path.splitext(liquid_fnm)[0] + ".key" + EngOpts["gas"]["tinker_key"] = os.path.splitext(gas_fnm)[0] + ".key" + if force_cuda: logger.warn("force_cuda option has no effect on Tinker engine.") + if rpmd_beads > 0: raise RuntimeError("TINKER cannot handle RPMD.") + if mts: logger.warn("Tinker not configured for multiple timestep integrator.") + EngOpts["liquid"].update(GenOpts) + EngOpts["gas"].update(GenOpts) + for i in EngOpts: + printcool_dictionary(EngOpts[i], "Engine options for %s" % i) + + # Set up MD options + MDOpts = OrderedDict() + MDOpts["liquid"] = OrderedDict([("nsteps", liquid_nsteps), ("timestep", liquid_timestep), + ("temperature", temperature), ("pressure", pressure), + ("nequil", liquid_nequil), ("minimize", minimize), + ("nsave", int(1000 * liquid_intvl / liquid_timestep)), + ("verbose", True), ('save_traj', TgtOptions['save_traj']), + ("threads", threads), ("anisotropic", anisotropic), ("nbarostat", nbarostat), + ("mts", mts), ("rpmd_beads", rpmd_beads), ("faststep", faststep)]) + MDOpts["gas"] = OrderedDict([("nsteps", gas_nsteps), ("timestep", gas_timestep), + ("temperature", temperature), ("nsave", int(1000 * gas_intvl / gas_timestep)), + ("nequil", gas_nequil), ("minimize", minimize), ("threads", 1), ("mts", mts), + ("rpmd_beads", rpmd_beads), ("faststep", faststep)]) + + + engines = [] + ## Setup and carry out simulations in chain + for i in range(args.length): + # Simulation files + if engname == "gromacs": + ndx_flag = False + coords = args.name + str(i+1) + ".gro" + top_file = args.name + str(i+1) + ".top" + mdp_file = args.name + str(i+1) + ".mdp" + ndx_file = args.name + str(i+1) + ".ndx" + if os.path.exists(ndx_file): + ndx_flag = True + + mol = Molecule(coords) + #---- + # Set coordinates and molecule for engine + #---- + EngOpts = OrderedDict([("FF", FF), + ("pbc", True), + ("coords", coords), + ("mol", mol)]) + + if engname == "gromacs": + # Gromacs-specific options + EngOpts["gmx_top"] = top_file + EngOpts["gmx_mdp"] = mdp_file + if ndx_flag: + EngOpts["gmx_ndx"] = ndx_file + + printcool_dictionary(EngOpts) + + # Create engine objects and store them for subsequent analysis. + s = Engine(name=args.name+str(i+1), **EngOpts) + + #=====================# + # Run the simulation. # + #=====================# + MDOpts = OrderedDict([("nsteps", args.nsteps), + ("nequil", args.nequil)]) + + printcool("Molecular dynamics simulation", color=4, bold=True) + s.md(verbose=True, **MDOpts) + + engines.append(s) + + #======================================================================# + # Extract the quantities of interest from the MD simulations and dump # + # the results to file. # + # =====================================================================# + results = OrderedDict() + for q in args.quantities: + logger.info("Extracting %s...\n" % q) + + # Initialize quantity + objstr = "Quantity_" + q.capitalize() + dm = il.import_module('..quantity', + package='forcebalance.quantity') + + Quantity = getattr(dm, objstr)(engname, args.temperature, args.pressure) + + Q, Qerr, Qgrad = Quantity.extract(engines, FF, mvals, h, pgrad, AGrad) + + results.setdefault("values", []).append(Q) + results.setdefault("errors", []).append(Qerr) + results.setdefault("grads", []).append(Qgrad) + + logger.info("Finished!\n") + + # Print out results for the quantity and its derivative. + Sep = printcool(("%s: % .4f +- % .4f \nAnalytic Derivative:" + % (q.capitalize(), Q, Qerr))) + FF.print_map(vals=Qgrad) + + # Dump results to file + logger.info("Writing final force field.\n") + pvals = FF.make(mvals) + + logger.info("Writing all simulation data to disk.\n") + with wopen('md_result.p') as f: + lp_dump((np.asarray(results["values"]), + np.asarray(results["errors"]), + np.asarray(results["grads"])), f) + +if __name__ == "__main__": + main() + diff --git a/src/nifty.py b/src/nifty.py index e0ff154a5..d51999d1e 100644 --- a/src/nifty.py +++ b/src/nifty.py @@ -237,11 +237,17 @@ def magic_string(str): #===============================# #| Math: Variable manipulation |# #===============================# -def isnan(var): - """ Attempt to see if the given variable is np.nan. """ - if isinstance(var, float): +def isnpnan(var): + """ + + Determine whether a variable is np.nan. I wrote this function + because np.isnan would crash if we use it on a dtype that is not + np.float + + """ + if any([isinstance(var, x) for x in [float, np.float, np.float32, np.float64, np.double]]): return np.isnan(var) - return False + else: return False def isint(word): """ONLY matches integers! If you have a decimal point? None shall pass! @@ -285,18 +291,6 @@ def floatornan(word): logger.info("Setting %s to % .1e\n" % big) return big -def isnpnan(var): - """ - - Determine whether a variable is np.nan. I wrote this function - because np.isnan would crash if we use it on a dtype that is not - np.float - - """ - if type(var) in [np.float, np.float32, np.float64, np.double]: - return np.isnan(var) - return False - def col(vec): """ Given any list, array, or matrix, return a 1-column matrix. diff --git a/src/thermo.py b/src/thermo.py index 7e80fdfab..71bb681ea 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -532,7 +532,7 @@ def reffld_error(reason=''): def intcol(col): if col in self.Data.columns: for idx in self.Data.index: - if not isnan(self.Data[col][idx]): + if not isnpnan(self.Data[col][idx]): self.Data[col][idx] = int(self.Data[col][idx]) def floatcol(col): @@ -571,36 +571,61 @@ def floatcol(col): return - def find_ic(self, index, stype, icn): + def find_file(self, index, stype, sufs, icn): """ - Search for a suitable initial condition file. + Search for a suitable file that matches the simulation index, + type, suffix and IC number. This can be used to search for + initial coordinates, but also auxiliary files for the + simulation (e.g. .top and .mdp files for a Gromacs simulation, + or .key files for a Tinker simulation.) + + Generally, it is preferred to provide files where the base + name matches the simulation type. However, since it is also + okay to put all files for a simulation type into a + subdirectory, generic file names like 'topol' and 'conf' may + be used. - Initial condition files will be searched for in the following priority: - targets/target_name/index/stype/ICs/stype_#.xyz - targets/target_name/index/stype/ICs/stype#.xyz - targets/target_name/index/stype/ICs/#.xyz - targets/target_name/index/stype/ICs/stype.xyz - targets/target_name/index/stype/ICs/coords.xyz - targets/target_name/index/stype/stype.xyz - targets/target_name/index/stype/coords.xyz - targets/target_name/index/stype.xyz - targets/target_name/stype.xyz + Initial condition files will be searched for in the following priority (suf stands for suffix) + targets/target_name/index/stype/ICs/stype_#.suf + targets/target_name/index/stype/ICs/stype#.suf + targets/target_name/index/stype/ICs/#.suf + targets/target_name/index/stype/ICs/stype.suf + targets/target_name/index/stype/ICs/coords.suf + targets/target_name/index/stype/ICs/conf.suf + targets/target_name/index/stype/ICs/topol.suf + targets/target_name/index/stype/ICs/grompp.suf + targets/target_name/index/stype/ICs/input.suf + targets/target_name/index/stype/ICs/tinker.suf + targets/target_name/index/stype/stype.suf + targets/target_name/index/stype/coords.suf + targets/target_name/index/stype.suf + targets/target_name/stype.suf + + @param[in] index Name of the index directory to look in + @param[in] stype Name of the simulation type to look for + @param[in] sufs List of suffixes to look for in order of priority + @param[in] icn Initial coordinate number (will look for sequentially numbered file, or single file with multiple structures) """ found = '' - basefnms = [(os.path.join(index, stype, 'ICs', stype+'_'+("%i" % icn)), True), - (os.path.join(index, stype, 'ICs', stype+("%i" % icn)), True), - (os.path.join(index, stype, 'ICs', ("%i" % icn)), True), - (os.path.join(index, stype, 'ICs', stype), False), - (os.path.join(index, stype, 'ICs', 'coords'), False), - (os.path.join(index, stype, stype), False), - (os.path.join(index, stype, 'coords'), False), - (os.path.join(index, stype), False), - (os.path.join(stype), False)] - paths = [] + # The 2-tuple here corresponds to: + # - Search path for the file + # - Whether the file that we're looking for is 'numbered' + # (i.e. a different file for each structure); otherwise the + # single file may contain multiple structures + pfxs = [stype, 'coords', 'conf', 'topol', 'grompp', 'input', 'tinker', ''] + + basefnms = list(itertools.chain(*[[(os.path.join(index, stype, 'ICs', pfx+'_'+("%i" % icn)), True), + (os.path.join(index, stype, 'ICs', pfx+("%i" % icn)), True), + (os.path.join(index, stype, 'ICs', pfx), False), + (os.path.join(index, stype, pfx), False), + (os.path.join(index, pfx), False), + (os.path.join(pfx), False)] for pfx in pfxs])) + + paths = OrderedDict() for fnm, numbered in basefnms: - for crdsfx in self.crdsfx: - fpath = os.path.join(self.tgtdir, fnm+crdsfx) - paths.append(fpath) + for suf in sufs: + fpath = os.path.join(self.tgtdir, fnm+suf if suf.startswith('.') else fnm+'.'+suf) + paths[fpath] = os.path.exists(fpath) if os.path.exists(fpath): if found != '': logger.info('Target %s Index %s Simulation %s : ' @@ -610,12 +635,15 @@ def find_ic(self, index, stype, icn): M = Molecule(fpath) if len(M) <= icn: logger.error("Target %s Index %s Simulation %s : " - "initial coordinate file %s doesn't have enough structures\n" % + "file %s doesn't have enough structures\n" % (self.name, index, stype, fpath)) raise RuntimeError logger.info('Target %s Index %s Simulation %s : ' - 'found initial coordinate file %s\n' % (self.name, index, stype, fpath)) + 'found file %s\n' % (self.name, index, stype, fpath)) found = fpath + if found == '': + logger.error("Can't find a file for index %s, simulation %s, suffix %s in the search path" % (index, stype, '/'.join(sufs))) + raise RuntimeError return found, 0 if numbered else icn def initialize_observables(self): @@ -685,15 +713,16 @@ def initialize_simulations(self): for index in self.Indices: for stype, tsset in SimTS.items(): - if 'n_ic' in self.Data.ix[index]: - n_ic = self.Data.ix[index]['n_ic'] + if 'n_ic' in self.Data2.ix[index]: + n_ic = self.Data2.ix[index]['n_ic'] + print n_ic if n_ic < 1: logger.error("n_ic must >= 1") raise RuntimeError else: n_ic = 1 for icn in range(n_ic): - icfnm, icframe = self.find_ic(index, stype, icn) + icfnm, icframe = self.find_file(index, stype, self.crdsfx, icn) sname = "%s_%i" % (stype, icn) if n_ic > 1 else stype self.Simulations[index].append(Simulation(sname, index, stype, icfnm, icframe, sorted(list(tsset)))) @@ -747,6 +776,8 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True): cmdlist.append('-T %g' % float(temp)) if pres != None: cmdlist.append('-P %g' % float(pres)) + if AGrad or AHess: + cmdlist.append('-g') cmdstr = ' '.join(cmdlist) print cmdstr # # cmdstr = '%s python md1.py %s %.3f %.3f' % (self.runpfx, temperature, pressure) From 42eb6a209430231857779ab97143a6fe7ee1208a Mon Sep 17 00:00:00 2001 From: Lee-Ping Date: Tue, 22 Apr 2014 09:50:15 -0700 Subject: [PATCH 16/25] Reduce the amount of printout --- src/target.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/target.py b/src/target.py index cb79b6006..66011cec2 100644 --- a/src/target.py +++ b/src/target.py @@ -715,7 +715,7 @@ def submit_jobs(self, mvals, AGrad=False, AHess=False): wq = getWorkQueue() - logger.info("Sending target '%s' to work queue for remote evaluation\n" % self.name) + # logger.info("Sending target '%s' to work queue for remote evaluation\n" % self.name) # input: # forcebalance.p: pickled mvals, options, and forcefield # rtarget.py: remote target evaluation script @@ -726,7 +726,7 @@ def submit_jobs(self, mvals, AGrad=False, AHess=False): forcebalance.nifty.queue_up(wq, "python rtarget.py > rtarget.out 2>&1", ["forcebalance.p", "rtarget.py", "target.tar.bz2"], ['objective.p', 'indicate.log', 'rtarget.out'], - tgt=self) + tgt=self, verbose=False) def read(self,mvals,AGrad=False,AHess=False): return self.get(mvals, AGrad, AHess) From f7ab58546f391e3c07c76c4a552174939c41e710 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Tue, 22 Apr 2014 13:32:18 -0700 Subject: [PATCH 17/25] Work in progress --- src/data/md_one.py | 103 ++++++++++++++++++----------------- src/parser.py | 4 +- src/thermo.py | 40 ++++++++++++-- studies/004_thermo/single.in | 2 + 4 files changed, 92 insertions(+), 57 deletions(-) diff --git a/src/data/md_one.py b/src/data/md_one.py index b52a87d9b..d0d956531 100644 --- a/src/data/md_one.py +++ b/src/data/md_one.py @@ -16,7 +16,7 @@ #| Global Imports |# #==================# -import os +import os, sys, re import argparse import numpy as np import importlib as il @@ -37,43 +37,51 @@ # Note: Only the simulation settings that vary across different # simulations in a target may be specified on the command line. +# ANYTHING THREE LETTERS OR BELOW IS A SHORT OPTION WITH ONE DASH! + parser = argparse.ArgumentParser() parser.add_argument('simulation', type=str, help='The simulation name (important; used in setting up)') -parser.add_argument('-T', type=float, default=None, +parser.add_argument('-T', '--temp', '--temperature', dest='temperature', type=float, help='Simulation temperature, leave blank for constant energy') -parser.add_argument('-P', type=float, default=None, +parser.add_argument('-P', '--pres', '--pressure', dest='pressure', type=float, help='Simulation pressure, leave blank for constant volume') -parser.add_argument('-g', action='store_true', +parser.add_argument('-g', '--grad', '--gradient', dest='gradient', action='store_true', help='Calculate gradients for output time series') # These settings may be specified for debugging purposes (i.e. they # will override what we read from forcebalance.p) -parser.add_argument('--nequil', type=int, default=0, +parser.add_argument('-eq', '--nequil', dest='nequil', type=int, help='Number of steps for equilibration run (leave blank to use default from forcebalance.p)') -parser.add_argument('--nsteps', type=int, default=0, +parser.add_argument('-md', '--nsteps', dest='nsteps', type=int, help='Number of steps for production run (leave blank to use default from forcebalance.p)') -parser.add_argument('--timestep', type=float, default=0.0, +parser.add_argument('-dt', '--timestep', dest='timestep', type=float, help='Time step in femtoseconds (leave blank to use default from forcebalance.p)') -parser.add_argument('--interval', type=float, default=0.0, +parser.add_argument('-sp', '--sample', dest='sample', type=float, + help='Sampling interval in picoseonds (leave blank to use default from forcebalance.p)') +parser.add_argument('-nt', '--threads', dest='threads', type=int, help='Sampling interval in picoseonds (leave blank to use default from forcebalance.p)') -parser.add_argument('--outputs', type=list, nargs='+', +parser.add_argument('-min', '--minimize', dest='minimize', action='store_true', + help='Whether to minimize the energy before starting the simulation') +parser.add_argument('-o', '-out', '--output', dest='output', type=str, nargs='+', help='Specify the time series which are written to disk') -args = parser.parse_args() +args = vars(parser.parse_args()) +# args = dict([(i, j) for i, j in vars(parser.parse_args()).items() if j != None]) def main(): """Usage: (prefix.sh) md_one.py - -T - -P - -g (if gradients of output timeseries are desired) - [Debugging Options Below] - --nequil - --nsteps - --outputs + -T, --temperature + -P, --pressure + -g, --grad (if gradients of output timeseries are desired) + -o, --outputs + -eq, --nequil + -md, --nsteps + -dt, --timestep + -nt, --interval This program is meant to be called automatically by ForceBalance because most options are loaded from the 'forcebalance.p' input @@ -81,56 +89,50 @@ def main(): """ - printcool("ForceBalance simulation using engine: %s" % engname.upper(), - color=4, bold=True) + # printcool("ForceBalance simulation using engine: %s" % engname.upper(), + # color=4, bold=True) + #---- # Load the ForceBalance pickle file which contains: #---- # - Force field object # - Optimization parameters - # - Options from the Target object that launched this simulation - FF, mvals, TgtOptions = lp_load(open('forcebalance.p')) + # - Options loaded from file + FF, mvals, fopts = lp_load(open('forcebalance.p')) FF.ffdir = '.' # Write the force field file. FF.make(mvals) - # Switch for calculating gradients of output time series. - AGrad = args.g + # # Switch for calculating gradients of output time series. + # AGrad = args['gradient'] #---- - # Load the options that are set in the ForceBalance input file. + # Load some options from file #---- # Finite difference step size - h = TgtOptions['h'] + h = fopts['h'] # Active parameters for gradient (if we filtered out the # parameters that are known to have no effect) - pgrad = TgtOptions['pgrad'] - # MD options; time step (fs), production steps, equilibration steps, interval for saving data (ps) - timestep = args.timestep if args.timestep > 0 else SimOptions['timestep'] - nsteps = args.nsteps if args.nsteps > 0 else SimOptions['nsteps'] - nequil = args.nequil if args.nequil > 0 else SimOptions['nequil'] - intvl = args.intvl if args.intvl > 0 else SimOptions['interval'] - fnm = SimOptions['coords'] - if not fnm.startswith(args.simulation): - logger.error("Problem with SimOptions['coords'] (%s):\n" % fnm) - logger.error("Coordinate file must be consistent with simulation type (%s)\n" % args.simulation) + pgrad = fopts['pgrad'] - # Number of threads, multiple timestep integrator, anisotropic box etc. - threads = SimOptions.get('md_threads', 1) - mts = SimOptions.get('mts_integrator', 0) - rpmd_beads = SimOptions.get('rpmd_beads', 0) - force_cuda = SimOptions.get('force_cuda', 0) - nbarostat = SimOptions.get('n_mcbarostat', 25) - anisotropic = SimOptions.get('anisotropic_box', 0) - minimize = SimOptions.get('minimize_energy', 1) + printcool_dictionary(args) - + # Number of threads, multiple timestep integrator, anisotropic box etc. + threads = fopts.get('md_threads', 1) + mts = fopts.get('mts_integrator', 0) + rpmd_beads = fopts.get('rpmd_beads', 0) + force_cuda = fopts.get('force_cuda', 0) + nbarostat = fopts.get('n_mcbarostat', 25) + anisotropic = fopts.get('anisotropic_box', 0) + minimize = fopts.get('minimize_energy', 1) #---- # Setting up MD simulations #---- + EngOpts = OrderedDict() + EngOpts = OrderedDict([("coords", fopts['coords']), ("pbc", False)]) + EngOpts["liquid"] = OrderedDict([("coords", liquid_fnm), ("mol", ML), ("pbc", True)]) - EngOpts["gas"] = OrderedDict([("coords", gas_fnm), ("mol", MG), ("pbc", False)]) GenOpts = OrderedDict([('FF', FF)]) if engname == "openmm": # OpenMM-specific options @@ -142,8 +144,8 @@ def main(): if threads > 1: logger.warn("Setting the number of threads will have no effect on OpenMM engine.\n") elif engname == "gromacs": # Gromacs-specific options - GenOpts["gmxpath"] = TgtOptions["gmxpath"] - GenOpts["gmxsuffix"] = TgtOptions["gmxsuffix"] + GenOpts["gmxpath"] = fopts["gmxpath"] + GenOpts["gmxsuffix"] = fopts["gmxsuffix"] EngOpts["liquid"]["gmx_top"] = os.path.splitext(liquid_fnm)[0] + ".top" EngOpts["liquid"]["gmx_mdp"] = os.path.splitext(liquid_fnm)[0] + ".mdp" EngOpts["gas"]["gmx_top"] = os.path.splitext(gas_fnm)[0] + ".top" @@ -154,7 +156,7 @@ def main(): if anisotropic: logger.warn("Gromacs not configured for anisotropic box scaling.") elif engname == "tinker": # Tinker-specific options - GenOpts["tinkerpath"] = TgtOptions["tinkerpath"] + GenOpts["tinkerpath"] = fopts["tinkerpath"] EngOpts["liquid"]["tinker_key"] = os.path.splitext(liquid_fnm)[0] + ".key" EngOpts["gas"]["tinker_key"] = os.path.splitext(gas_fnm)[0] + ".key" if force_cuda: logger.warn("force_cuda option has no effect on Tinker engine.") @@ -166,12 +168,13 @@ def main(): printcool_dictionary(EngOpts[i], "Engine options for %s" % i) # Set up MD options - MDOpts = OrderedDict() + # These are used in the function call to molecular_dynamics() + MDOpts["liquid"] = OrderedDict([("nsteps", liquid_nsteps), ("timestep", liquid_timestep), ("temperature", temperature), ("pressure", pressure), ("nequil", liquid_nequil), ("minimize", minimize), ("nsave", int(1000 * liquid_intvl / liquid_timestep)), - ("verbose", True), ('save_traj', TgtOptions['save_traj']), + ("verbose", True), ('save_traj', fopts['save_traj']), ("threads", threads), ("anisotropic", anisotropic), ("nbarostat", nbarostat), ("mts", mts), ("rpmd_beads", rpmd_beads), ("faststep", faststep)]) MDOpts["gas"] = OrderedDict([("nsteps", gas_nsteps), ("timestep", gas_timestep), diff --git a/src/parser.py b/src/parser.py index f85ef4fb8..b424ae7dc 100644 --- a/src/parser.py +++ b/src/parser.py @@ -123,7 +123,7 @@ "adaptive_damping" : (0.5, 10, 'Damping factor that ties down the trust radius to trust0; decrease for a more variable step size.', 'Main Optimizer'), "error_tolerance" : (0.0, 10, 'Error tolerance; the optimizer will only reject steps that increase the objective function by more than this number.', 'Main Optimizer'), "search_tolerance" : (1e-4, -10, 'Search tolerance; used only when trust radius is negative, dictates convergence threshold of nonlinear search.', 'Main Optimizer with negative mintrust; advanced usage'), - "amoeba_eps" : (None, -10, 'The AMOEBA mutual polarization criterion.', 'Targets in OpenMM / TINKER that use the AMOEBA force field', ['OPENMM','TINKER']) + "amoeba_eps" : (None, -10, 'The AMOEBA mutual polarization criterion.', 'Targets in OpenMM / TINKER that use the AMOEBA force field', ['OPENMM','TINKER']), }, 'sections': {"read_mvals" : (None, 100, 'Paste mathematical parameters into the input file for them to be read in directly', 'Restarting an optimization'), "read_pvals" : (None, 100, 'Paste physical parameters into the input file for them to be read in directly', 'Restarting an optimization (recommend use_mvals instead)'), @@ -239,6 +239,8 @@ "self_pol_mu0" : (0.0, -150, 'Gas-phase dipole parameter for self-polarization correction (in debye).', 'Condensed phase property targets', 'liquid'), "self_pol_alpha" : (0.0, -150, 'Polarizability parameter for self-polarization correction (in debye).', 'Condensed phase property targets', 'liquid'), "epsgrad" : (0.0, -150, 'Gradient below this threshold will be set to zero.', 'All targets'), + "timestep" : (1.0, 0, 'Time step for molecular dynamics (in femtoseconds).', 'Thermodynamic property targets', 'thermo'), + "interval" : (1.0, 0, 'Sampling interval for molecular dynamics (in picoseconds).', 'Thermodynamic property targets', 'thermo'), }, 'sections': {} } diff --git a/src/thermo.py b/src/thermo.py index 71bb681ea..8736bf1e6 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -369,9 +369,13 @@ def __init__(self, options, tgt_opts, forcefield): # Length of simulation chain self.set_option(tgt_opts, "simulations", "user_simulation_names", forceprint=True) # Number of time steps in the equilibration run - self.set_option(tgt_opts, "eq_steps", forceprint=True) + self.set_option(tgt_opts, "eq_steps", "nequil", forceprint=True) # Number of time steps in the production run - self.set_option(tgt_opts, "md_steps", forceprint=True) + self.set_option(tgt_opts, "md_steps", "nsteps", forceprint=True) + # Time step (in femtoseconds) + self.set_option(tgt_opts, "timestep", forceprint=True) + # Sampling interval (in picoseconds) + self.set_option(tgt_opts, "interval", "sample", forceprint=True) ## Variables # Prefix names for simulation data @@ -384,6 +388,8 @@ def __init__(self, options, tgt_opts, forcefield): self.weights = {} # The list of simulations that we'll be running. self.SimNames = [i.lower() for i in self.user_simulation_names] + # Store the dictionary of allowed suffixes + self.OptionDict['crdsfx'] = self.crdsfx ## Read source data and initialize points; creates self.Data, self.Indices and self.Columns objects. self.read_source(os.path.join(self.root, self.tgtdir, self.source)) @@ -762,7 +768,6 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True): # Submit or run the simulation if the result file does not exist. if not (os.path.exists('result.p') or os.path.exists('result.p.bz2')): # Write to disk: Force field object, current parameter values, target options - with wopen('forcebalance.p') as f: lp_dump((self.FF,mvals,self.OptionDict),f) M = Molecule(os.path.join(self.root, Sim.initial))[Sim.iframe] M.write("%s%s" % (Sim.type, self.crdsfx[0])) # # Get relevant files from the target folder, I suppose. @@ -771,20 +776,43 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True): # self.last_traj += [os.path.join(os.getcwd(), i) for i in self.extra_output] # self.liquid_mol[simnum%len(self.liquid_mol)].write(self.liquid_coords, ftype='tinker' if self.engname == 'tinker' else None) # Command for running the simulation. + ## Copy run scripts from ForceBalance installation directory + + # We can build the entire MD options dictionary here!! + # Update dictionary with simulation options. + OptionDict = copy.deepcopy(self.OptionDict) + OptionDict['gradient'] = AGrad + OptionDict['coords'] = "%s%s" % (Sim.type, self.crdsfx[0]) + OptionDict['simtype'] = Sim.type + # # In the future we should have these settings + # OptionDict['nequil'] = self.nequil + # OptionDict['nsteps'] = self.nsteps + # OptionDict['timestep'] = self.timestep + # OptionDict['sample'] = self.sample + # OptionDict['minimize'] = self.minimize + printcool_dictionary(OptionDict) + + with wopen('forcebalance.p') as f: lp_dump((self.FF,mvals,OptionDict),f) + for f in self.scripts: + LinkFile(os.path.join(os.path.split(__file__)[0], "data", f), + os.path.join(os.getcwd(), f)) cmdlist = ['%s python md_one.py %s' % (self.mdpfx, Sim.type)] + #cmdlist.append('-eq %i -md %i -dt %g -sp %g' % (self.nequil, self.nsteps, self.timestep, self.sample)) if temp != None: cmdlist.append('-T %g' % float(temp)) if pres != None: cmdlist.append('-P %g' % float(pres)) - if AGrad or AHess: - cmdlist.append('-g') + # if AGrad or AHess: + # cmdlist.append('-g') + # cmdlist.append('-o') + # cmdlist += Sim.timeseries.keys() cmdstr = ' '.join(cmdlist) print cmdstr # # cmdstr = '%s python md1.py %s %.3f %.3f' % (self.runpfx, temperature, pressure) # if wq == None: # logger.info("Running condensed phase simulation locally.\n") # logger.info("You may tail -f %s/npt.out in another terminal window\n" % os.getcwd()) - # _exec(cmdstr, copy_stderr=True, outfnm='npt.out') + _exec(cmdstr, copy_stderr=True, outfnm='md_one.out') # else: # queue_up(wq, command = cmdstr+' &> npt.out', # input_files = self.nptfiles + self.scripts + ['forcebalance.p'], diff --git a/studies/004_thermo/single.in b/studies/004_thermo/single.in index 9fb288210..1ec7dbde5 100644 --- a/studies/004_thermo/single.in +++ b/studies/004_thermo/single.in @@ -71,4 +71,6 @@ quantities density h_vap simulations liquid gas md_steps 100000 eq_steps 50000 +interval 0.1 +timestep 2.0 $end From 88b1f15ca3f60b0c1e40fd4d84eefb5fb3db6faf Mon Sep 17 00:00:00 2001 From: leeping Date: Tue, 22 Apr 2014 17:28:48 -0700 Subject: [PATCH 18/25] Added simulation.py which contains simulation class (container for simulation settings) --- src/data/md_one.py | 105 ++++++++++++++++------ src/gmxio.py | 2 + src/simulation.py | 76 ++++++++++++++++ src/thermo.py | 213 +++++++++++++++++++++------------------------ 4 files changed, 255 insertions(+), 141 deletions(-) create mode 100644 src/simulation.py diff --git a/src/data/md_one.py b/src/data/md_one.py index d0d956531..9da428e84 100644 --- a/src/data/md_one.py +++ b/src/data/md_one.py @@ -66,8 +66,7 @@ parser.add_argument('-o', '-out', '--output', dest='output', type=str, nargs='+', help='Specify the time series which are written to disk') -args = vars(parser.parse_args()) -# args = dict([(i, j) for i, j in vars(parser.parse_args()).items() if j != None]) +Copts = vars(parser.parse_args()) def main(): @@ -98,39 +97,93 @@ def main(): # - Force field object # - Optimization parameters # - Options loaded from file - FF, mvals, fopts = lp_load(open('forcebalance.p')) + FF, mvals, Fopts = lp_load(open('forcebalance.p')) FF.ffdir = '.' # Write the force field file. FF.make(mvals) - # # Switch for calculating gradients of output time series. - # AGrad = args['gradient'] + printcool_dictionary(Copts, title="Options from command line") + printcool_dictionary(Fopts, title="Options from file") + + # Read the command line options (they can override the options from file.) + # Calculate energy / dipole derivatives. + AGrad = Copts['gradient'] or Fopts['gradient'] + # Whether to minimize the energy. + minimize = Copts['minimize'] or Fopts['minimize'] + # Engine name. + engname = Fopts['engname'] + # + threads = Copts.get('threads', Fopts.get('threads', 1)) + + # # Get the temperature. + # temperature = Copts.get('temperature', Fopts.get('temperature', None)) + # # Get the pressure. + # pressure = Copts.get('pressure', Fopts.get('pressure', None)) + # # + # nequil = Copts.get('nequil', Fopts.get('nequil')) + #---- - # Load some options from file + # load some options from file #---- # Finite difference step size - h = fopts['h'] - # Active parameters for gradient (if we filtered out the - # parameters that are known to have no effect) - pgrad = fopts['pgrad'] + h = Fopts['h'] + # Active parameters for taking the gradient + pgrad = Fopts['pgrad'] + # Name of the initial coordinate file + coords = Fopts['coords'] + # Base name of the initial coordinate file + cbase = os.path.splitext(coords)[0] + # Actually start to do stuff. + # Molecule object corresponding to + M = Molecule(coords) + + #---- + # Engine options + #---- + EngOpts = OrderedDict([("coords", coords), ("pbc", Fopts['pbc'])]) + if engname == "openmm": + if pbc: + EngOpts["platname"] = 'CUDA' + else: + EngOpts["platname"] = 'Reference' + # Force crash if asking for the CUDA platform and force_cuda option is on + # (because we don't want to inadvertently run using Reference platform) + if EngOpts["platname"] == 'CUDA' and Fopts['force_cuda']: + try: Platform.getPlatformByName('CUDA') + except: raise RuntimeError('Forcing failure because CUDA platform unavailable') + if threads > 1: + logger.warn("Setting the number of threads will have no effect on OpenMM engine.\n") + elif engname == "gromacs": + # Gromacs-specific options + EngOpts["gmxpath"] = Fopts["gmxpath"] + EngOpts["gmxsuffix"] = Fopts["gmxsuffix"] + EngOpts["gmx_top"] = Fopts["gmx_top"] + EngOpts["gmx_mdp"] = Fopts["gmx_mdp"] + if Fopts['force_cuda']: logger.warn("force_cuda option has no effect on Gromacs engine.") + if Fopts['rpmd_beads'] > 0: raise RuntimeError("Gromacs cannot handle RPMD.") + if Fopts['mts']: logger.warn("Gromacs not configured for multiple timestep integrator.") + if Fopts['anisotropic']: logger.warn("Gromacs not configured for anisotropic box scaling.") + elif engname == "tinker": + EngOpts["tinkerpath"] = Fopts["tinkerpath"] + EngOpts["tinker_key"] = Fopts["tinker_key"] - printcool_dictionary(args) + # if Fopts['threads'] > 1: + printcool_dictionary(EngOpts, title="Engine options") # Number of threads, multiple timestep integrator, anisotropic box etc. - threads = fopts.get('md_threads', 1) - mts = fopts.get('mts_integrator', 0) - rpmd_beads = fopts.get('rpmd_beads', 0) - force_cuda = fopts.get('force_cuda', 0) - nbarostat = fopts.get('n_mcbarostat', 25) - anisotropic = fopts.get('anisotropic_box', 0) - minimize = fopts.get('minimize_energy', 1) - + # threads = Fopts.get('md_threads', 1) + # mts = Fopts.get('mts_integrator', 0) + # rpmd_beads = Fopts.get('rpmd_beads', 0) + # force_cuda = Fopts.get('force_cuda', 0) + # nbarostat = Fopts.get('n_mcbarostat', 25) + # anisotropic = Fopts.get('anisotropic_box', 0) + # minimize = Fopts.get('minimize_energy', 1) + sys.exit() + #---- # Setting up MD simulations #---- - - EngOpts = OrderedDict() - EngOpts = OrderedDict([("coords", fopts['coords']), ("pbc", False)]) + EngOpts["liquid"] = OrderedDict([("coords", liquid_fnm), ("mol", ML), ("pbc", True)]) GenOpts = OrderedDict([('FF', FF)]) @@ -144,8 +197,8 @@ def main(): if threads > 1: logger.warn("Setting the number of threads will have no effect on OpenMM engine.\n") elif engname == "gromacs": # Gromacs-specific options - GenOpts["gmxpath"] = fopts["gmxpath"] - GenOpts["gmxsuffix"] = fopts["gmxsuffix"] + GenOpts["gmxpath"] = Fopts["gmxpath"] + GenOpts["gmxsuffix"] = Fopts["gmxsuffix"] EngOpts["liquid"]["gmx_top"] = os.path.splitext(liquid_fnm)[0] + ".top" EngOpts["liquid"]["gmx_mdp"] = os.path.splitext(liquid_fnm)[0] + ".mdp" EngOpts["gas"]["gmx_top"] = os.path.splitext(gas_fnm)[0] + ".top" @@ -156,7 +209,7 @@ def main(): if anisotropic: logger.warn("Gromacs not configured for anisotropic box scaling.") elif engname == "tinker": # Tinker-specific options - GenOpts["tinkerpath"] = fopts["tinkerpath"] + GenOpts["tinkerpath"] = Fopts["tinkerpath"] EngOpts["liquid"]["tinker_key"] = os.path.splitext(liquid_fnm)[0] + ".key" EngOpts["gas"]["tinker_key"] = os.path.splitext(gas_fnm)[0] + ".key" if force_cuda: logger.warn("force_cuda option has no effect on Tinker engine.") @@ -174,7 +227,7 @@ def main(): ("temperature", temperature), ("pressure", pressure), ("nequil", liquid_nequil), ("minimize", minimize), ("nsave", int(1000 * liquid_intvl / liquid_timestep)), - ("verbose", True), ('save_traj', fopts['save_traj']), + ("verbose", True), ('save_traj', Fopts['save_traj']), ("threads", threads), ("anisotropic", anisotropic), ("nbarostat", nbarostat), ("mts", mts), ("rpmd_beads", rpmd_beads), ("faststep", faststep)]) MDOpts["gas"] = OrderedDict([("nsteps", gas_nsteps), ("timestep", gas_timestep), diff --git a/src/gmxio.py b/src/gmxio.py index bcd0525a8..b43dc205c 100644 --- a/src/gmxio.py +++ b/src/gmxio.py @@ -1497,6 +1497,8 @@ def __init__(self,options,tgt_opts,forcefield): self.engname = "gromacs" # Valid coordinate suffix. self.crdsfx = ['.gro', '.pdb'] + # Auxiliary (e.g. topology) files. + self.auxsfx = [['.mdp'], ['.top']] # Command prefix. self.mdpfx = "bash gmxprefix.bash" # Scripts to be copied from the ForceBalance installation directory. diff --git a/src/simulation.py b/src/simulation.py new file mode 100644 index 000000000..7bd52ae91 --- /dev/null +++ b/src/simulation.py @@ -0,0 +1,76 @@ +import os +from forcebalance.molecule import Molecule +from collections import OrderedDict + +class Simulation(object): + + """ + Data container for a MD simulation (specified by index, simulation + type, initial condition). These settings are written to a file + then passed to md_one.py. + + The Simulation object is passed between the master ForceBalance + process and the remote script (e.g. md_one.py). + """ + + type_settings = {'gas': {'pbc' : 0}, + 'liquid': {'pbc' : 1}, + 'solid': {'pbc' : 1, 'anisotropic_box' : 1}, + 'bilayer': {'pbc' : 1, 'anisotropic_box' : 1}} + + def __init__(self, target, name, index, stype, initial, iframe, tsnames): + print target.root, target.tgtdir + raw_input() + # The simulation name will identify the simulation within a collection + # belonging to the Index. + self.name = name + # The Index that the simulation belongs to. + self.index = index + # The type of simulation (liquid, gas, solid, bilayer...) + if stype not in Simulation.type_settings.keys(): + logger.error('Simulation type %s is not supported at this time') + raise RuntimeError + self.type = stype + # The file containing initial coordinates. + self.initial = initial + # The frame number in the initial coordinate file. + self.iframe = iframe + # The time series for the simulation. + self.timeseries = OrderedDict([(i, []) for i in tsnames]) + # The file extension that the coordinate file will be written with. + self.fext = os.path.splitext(initial)[1] + # The file name of the coordinate file. + self.coords = "%s%s" % (self.type, self.fext) + # The number of threads for this simulation. + self.threads = target.OptionDict.get('md_threads', 1) + # Whether to use multiple timestep integrator. + self.mts = target.OptionDict.get('mts_integrator', 0) + # The number of beads in an RPMD simulation. + self.rpmd_beads = target.OptionDict.get('rpmd_beads', 0) + # Whether to use the CUDA platform (OpenMM only). + self.force_cuda = target.OptionDict.get('force_cuda', 0) + # Number of MD steps between successive calls to Monte Carlo barostat (OpenMM only). + self.nbarostat = target.OptionDict.get('n_mcbarostat', 25) + # Flag for anisotropic simulation cell. + self.anisotropic = target.OptionDict.get('anisotropic_box', 0) + # Flag for minimizing the energy. + self.minimize = target.OptionDict.get('minimize_energy', 0) + # Finite difference step size. + self.h = target.h + # Name of the simulation engine. + self.engname = target.engname + # Whether to use periodic boundary conditions. + self.pbc = Simulation.type_settings[self.type]['pbc'] + # Gromacs-specific options. + if self.engname == 'gromacs': + self.gmxpath = target.gmxpath + self.gmxsuffix = target.gmxsuffix + elif self.engname == 'tinker': + self.tinkerpath = target.tinkerpath + + def __str__(self): + msg = [] + msg.append("Simulation: Name %s, Index %s, Type %s" % (self.name, self.index, self.type)) + msg.append("Initial Conditions: File %s Frame %i" % (self.initial, self.iframe)) + msg.append("Timeseries Names: %s" % (', '.join(self.timeseries.keys()))) + return "\n".join(msg) diff --git a/src/thermo.py b/src/thermo.py index 8736bf1e6..19f66a43c 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -9,6 +9,7 @@ import cStringIO from forcebalance.molecule import Molecule +from forcebalance.simulation import Simulation from forcebalance.observable import OMap from forcebalance.target import Target from forcebalance.finite_difference import in_fd @@ -350,6 +351,81 @@ def stand_head(head, obs): logger.debug("header %s renamed to %s\n" % (hfirst, newh)) return newh, punit, obs +def find_file(tgtdir, index, stype, sufs, icn): + """ + Search for a suitable file that matches the simulation index, + type, suffix and IC number. This can be used to search for + initial coordinates, but also auxiliary files for the + simulation (e.g. .top and .mdp files for a Gromacs simulation, + or .key files for a Tinker simulation.) + + Generally, it is preferred to provide files where the base + name matches the simulation type. However, since it is also + okay to put all files for a simulation type into a + subdirectory, generic file names like 'topol' and 'conf' may + be used. + + Initial condition files will be searched for in the following priority (suf stands for suffix) + targets/target_name/index/stype/ICs/stype_#.suf + targets/target_name/index/stype/ICs/stype#.suf + targets/target_name/index/stype/ICs/#.suf + targets/target_name/index/stype/ICs/stype.suf + targets/target_name/index/stype/ICs/coords.suf + targets/target_name/index/stype/ICs/conf.suf + targets/target_name/index/stype/ICs/topol.suf + targets/target_name/index/stype/ICs/grompp.suf + targets/target_name/index/stype/ICs/input.suf + targets/target_name/index/stype/ICs/tinker.suf + targets/target_name/index/stype/stype.suf + targets/target_name/index/stype/coords.suf + targets/target_name/index/stype.suf + targets/target_name/stype.suf + + @param[in] index Name of the index directory to look in + @param[in] stype Name of the simulation type to look for + @param[in] sufs List of suffixes to look for in order of priority + @param[in] icn Initial coordinate number (will look for sequentially numbered file, or single file with multiple structures) + """ + found = '' + # The 2-tuple here corresponds to: + # - Search path for the file + # - Whether the file that we're looking for is 'numbered' + # (i.e. a different file for each structure); otherwise the + # single file may contain multiple structures + pfxs = [stype, 'coords', 'conf', 'topol', 'grompp', 'input', 'tinker', ''] + + basefnms = list(itertools.chain(*[[(os.path.join(index, stype, 'ICs', pfx+'_'+("%i" % icn)), True), + (os.path.join(index, stype, 'ICs', pfx+("%i" % icn)), True), + (os.path.join(index, stype, 'ICs', pfx), False), + (os.path.join(index, stype, pfx), False), + (os.path.join(index, pfx), False), + (os.path.join(pfx), False)] for pfx in pfxs])) + + paths = OrderedDict() + for fnm, numbered in basefnms: + for suf in sufs: + fpath = os.path.join(tgtdir, fnm+suf if suf.startswith('.') else fnm+'.'+suf) + paths[fpath] = os.path.exists(fpath) + if os.path.exists(fpath): + if found != '': + logger.info('Target %s Index %s Simulation %s : ' + '%s overrides %s\n' % (os.path.basename(tgtdir), index, stype, fpath)) + else: + if not numbered: + M = Molecule(fpath) + if len(M) <= icn: + logger.error("Target %s Index %s Simulation %s : " + "file %s doesn't have enough structures\n" % + (os.path.basename(tgtdir), index, stype, fpath)) + raise RuntimeError + logger.info('Target %s Index %s Simulation %s : ' + 'found file %s\n' % (os.path.basename(tgtdir), index, stype, fpath)) + found = fpath + if found == '': + logger.error("Can't find a file for index %s, simulation %s, suffix %s in the search path" % (index, stype, '/'.join(sufs))) + raise RuntimeError + return found, 0 if numbered else icn + class Thermo(Target): """ A target for fitting general experimental data sets. The source @@ -577,81 +653,6 @@ def floatcol(col): return - def find_file(self, index, stype, sufs, icn): - """ - Search for a suitable file that matches the simulation index, - type, suffix and IC number. This can be used to search for - initial coordinates, but also auxiliary files for the - simulation (e.g. .top and .mdp files for a Gromacs simulation, - or .key files for a Tinker simulation.) - - Generally, it is preferred to provide files where the base - name matches the simulation type. However, since it is also - okay to put all files for a simulation type into a - subdirectory, generic file names like 'topol' and 'conf' may - be used. - - Initial condition files will be searched for in the following priority (suf stands for suffix) - targets/target_name/index/stype/ICs/stype_#.suf - targets/target_name/index/stype/ICs/stype#.suf - targets/target_name/index/stype/ICs/#.suf - targets/target_name/index/stype/ICs/stype.suf - targets/target_name/index/stype/ICs/coords.suf - targets/target_name/index/stype/ICs/conf.suf - targets/target_name/index/stype/ICs/topol.suf - targets/target_name/index/stype/ICs/grompp.suf - targets/target_name/index/stype/ICs/input.suf - targets/target_name/index/stype/ICs/tinker.suf - targets/target_name/index/stype/stype.suf - targets/target_name/index/stype/coords.suf - targets/target_name/index/stype.suf - targets/target_name/stype.suf - - @param[in] index Name of the index directory to look in - @param[in] stype Name of the simulation type to look for - @param[in] sufs List of suffixes to look for in order of priority - @param[in] icn Initial coordinate number (will look for sequentially numbered file, or single file with multiple structures) - """ - found = '' - # The 2-tuple here corresponds to: - # - Search path for the file - # - Whether the file that we're looking for is 'numbered' - # (i.e. a different file for each structure); otherwise the - # single file may contain multiple structures - pfxs = [stype, 'coords', 'conf', 'topol', 'grompp', 'input', 'tinker', ''] - - basefnms = list(itertools.chain(*[[(os.path.join(index, stype, 'ICs', pfx+'_'+("%i" % icn)), True), - (os.path.join(index, stype, 'ICs', pfx+("%i" % icn)), True), - (os.path.join(index, stype, 'ICs', pfx), False), - (os.path.join(index, stype, pfx), False), - (os.path.join(index, pfx), False), - (os.path.join(pfx), False)] for pfx in pfxs])) - - paths = OrderedDict() - for fnm, numbered in basefnms: - for suf in sufs: - fpath = os.path.join(self.tgtdir, fnm+suf if suf.startswith('.') else fnm+'.'+suf) - paths[fpath] = os.path.exists(fpath) - if os.path.exists(fpath): - if found != '': - logger.info('Target %s Index %s Simulation %s : ' - '%s overrides %s\n' % (self.name, index, stype, fpath)) - else: - if not numbered: - M = Molecule(fpath) - if len(M) <= icn: - logger.error("Target %s Index %s Simulation %s : " - "file %s doesn't have enough structures\n" % - (self.name, index, stype, fpath)) - raise RuntimeError - logger.info('Target %s Index %s Simulation %s : ' - 'found file %s\n' % (self.name, index, stype, fpath)) - found = fpath - if found == '': - logger.error("Can't find a file for index %s, simulation %s, suffix %s in the search path" % (index, stype, '/'.join(sufs))) - raise RuntimeError - return found, 0 if numbered else icn - def initialize_observables(self): """ Determine Observable objects to be created. Checks to see @@ -728,10 +729,9 @@ def initialize_simulations(self): else: n_ic = 1 for icn in range(n_ic): - icfnm, icframe = self.find_file(index, stype, self.crdsfx, icn) + icfnm, icframe = find_file(self.tgtdir, index, stype, self.crdsfx, icn) sname = "%s_%i" % (stype, icn) if n_ic > 1 else stype - self.Simulations[index].append(Simulation(sname, index, stype, icfnm, icframe, sorted(list(tsset)))) - + self.Simulations[index].append(Simulation(self, sname, index, stype, icfnm, icframe, sorted(list(tsset)))) return def submit_jobs(self, mvals, AGrad=True, AHess=True): @@ -769,7 +769,7 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True): if not (os.path.exists('result.p') or os.path.exists('result.p.bz2')): # Write to disk: Force field object, current parameter values, target options M = Molecule(os.path.join(self.root, Sim.initial))[Sim.iframe] - M.write("%s%s" % (Sim.type, self.crdsfx[0])) + M.write(Sim.coords) # # Get relevant files from the target folder, I suppose. # link_dir_contents(os.path.join(self.root,self.rundir),os.getcwd()) # # Determine initial coordinates. @@ -777,22 +777,33 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True): # self.liquid_mol[simnum%len(self.liquid_mol)].write(self.liquid_coords, ftype='tinker' if self.engname == 'tinker' else None) # Command for running the simulation. ## Copy run scripts from ForceBalance installation directory - # We can build the entire MD options dictionary here!! # Update dictionary with simulation options. - OptionDict = copy.deepcopy(self.OptionDict) - OptionDict['gradient'] = AGrad - OptionDict['coords'] = "%s%s" % (Sim.type, self.crdsfx[0]) - OptionDict['simtype'] = Sim.type + # OptionDict = copy.deepcopy(self.OptionDict) + # OptionDict['gradient'] = AGrad + # Sim.gradient = AGrad + # Sim.nequil = self.nequil + # Sim.nsteps = self.nsteps + # Sim.timestep = self.timestep + # Sim.sample = self.sample + # Sim.h = + # Sim.pgrad = + # OptionDict['coords'] = "%s%s" % (Sim.type, self.crdsfx[0]) + # OptionDict.update(vars(Sim)) + # OptionDict['simtype'] = Sim.type # # In the future we should have these settings # OptionDict['nequil'] = self.nequil # OptionDict['nsteps'] = self.nsteps # OptionDict['timestep'] = self.timestep # OptionDict['sample'] = self.sample # OptionDict['minimize'] = self.minimize - printcool_dictionary(OptionDict) + # printcool_dictionary(vars(Sim)) + # SimOpts = dict(vars(Sim)) + Opts = vars(Sim) + Opts['gradient'] = AGrad + Opts['pgrad'] = self.pgrad - with wopen('forcebalance.p') as f: lp_dump((self.FF,mvals,OptionDict),f) + with wopen('forcebalance.p') as f: lp_dump((self.FF,mvals,Opts),f) for f in self.scripts: LinkFile(os.path.join(os.path.split(__file__)[0], "data", f), os.path.join(os.getcwd(), f)) @@ -812,7 +823,7 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True): # if wq == None: # logger.info("Running condensed phase simulation locally.\n") # logger.info("You may tail -f %s/npt.out in another terminal window\n" % os.getcwd()) - _exec(cmdstr, copy_stderr=True, outfnm='md_one.out') + _exec(cmdstr, copy_stderr=False, outfnm='md_one.out') # else: # queue_up(wq, command = cmdstr+' &> npt.out', # input_files = self.nptfiles + self.scripts + ['forcebalance.p'], @@ -1095,31 +1106,3 @@ def __str__(self): return "\n".join(msg) -class Simulation(object): - - """ - Data container for a simulation (specified by index, simulation - type, initial condition). - """ - - def __init__(self, name, index, stype, initial, iframe, tsnames): - # The simulation name will identify the simulation within a collection - # belonging to the Index. - self.name = name - # The Index that the simulation belongs to. - self.index = index - # The type of simulation (liquid, gas, solid, bilayer...) - self.type = stype - # The file containing initial coordinates. - self.initial = initial - # The frame number in the initial coordinate file. - self.iframe = iframe - # The time series for the simulation. - self.timeseries = OrderedDict([(i, []) for i in tsnames]) - - def __str__(self): - msg = [] - msg.append("Simulation: Name %s, Index %s, Type %s" % (self.name, self.index, self.type)) - msg.append("Initial Conditions: File %s Frame %i" % (self.initial, self.iframe)) - msg.append("Timeseries Names: %s" % (', '.join(self.timeseries.keys()))) - return "\n".join(msg) From 0cb3e61c8b7b66abe872f0387bbf0b2a5ddabc69 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Tue, 22 Apr 2014 22:24:44 -0700 Subject: [PATCH 19/25] md_one.py creates Engine object. --- src/data/md_one.py | 92 +++++--- src/simulation.py | 76 ------- src/thermo.py | 202 ++++++++++++++---- .../targets/LiquidBromine/1/gas.mdp | 14 +- 4 files changed, 226 insertions(+), 158 deletions(-) delete mode 100644 src/simulation.py diff --git a/src/data/md_one.py b/src/data/md_one.py index 9da428e84..ff1763179 100644 --- a/src/data/md_one.py +++ b/src/data/md_one.py @@ -36,9 +36,6 @@ # Note: Only the simulation settings that vary across different # simulations in a target may be specified on the command line. - -# ANYTHING THREE LETTERS OR BELOW IS A SHORT OPTION WITH ONE DASH! - parser = argparse.ArgumentParser() parser.add_argument('simulation', type=str, help='The simulation name (important; used in setting up)') @@ -66,7 +63,41 @@ parser.add_argument('-o', '-out', '--output', dest='output', type=str, nargs='+', help='Specify the time series which are written to disk') -Copts = vars(parser.parse_args()) +# Parse the command line options and save as a dictionary (don't save NoneTypes) +parsed = parser.parse_args() +args = OrderedDict([(i, j) for i, j in vars(parsed).items() if j != None]) + +#---- +# Load the ForceBalance pickle file which contains: +#---- +# - Force field object +# - Optimization parameters +# - Options loaded from file +FF, mvals, Sim = lp_load(open('forcebalance.p')) +FF.ffdir = '.' + +# Engine name. +engname = Sim.engname + +# Import modules and create the correct Engine object. +if engname == "openmm": + try: + from simtk.unit import * + from simtk.openmm import * + from simtk.openmm.app import * + except: + traceback.print_exc() + raise Exception("Cannot import OpenMM modules") + from forcebalance.openmmio import * + Engine = OpenMM +elif engname == "gromacs" or engname == "gmx": + from forcebalance.gmxio import * + Engine = GMX +elif engname == "tinker": + from forcebalance.tinkerio import * + Engine = TINKER +else: + raise Exception('OpenMM, GROMACS, and TINKER are supported at this time.') def main(): @@ -88,39 +119,44 @@ def main(): """ - # printcool("ForceBalance simulation using engine: %s" % engname.upper(), - # color=4, bold=True) + # Write the force field file. + FF.make(mvals) + + # Read the command line options (they may override the options from file.) + AGrad = args['gradient'] or Sim.gradient + for i in ['temperature', 'pressure', 'nequil', 'nsteps', 'timestep', 'sample', 'threads', 'minimize']: + if i in args: + Sim.MDOpts[i] = args[i] #---- - # Load the ForceBalance pickle file which contains: + # Print some options. + # At this point, engine and MD options should be SET! #---- - # - Force field object - # - Optimization parameters - # - Options loaded from file - FF, mvals, Fopts = lp_load(open('forcebalance.p')) - FF.ffdir = '.' - # Write the force field file. - FF.make(mvals) + printcool("ForceBalance simulation using engine: %s" % engname.upper(), + color=4, bold=True) + printcool_dictionary(args, title="Options from command line") + printcool_dictionary(Sim.EngOpts, title="Engine options") + printcool_dictionary(Sim.MDOpts, title="Molecular dynamics options") + + #---- + # For convenience, assign some local variables. + #---- + # Finite difference step size + h = Sim.h + # Active parameters to differentiate + pgrad = Sim.pgrad - printcool_dictionary(Copts, title="Options from command line") - printcool_dictionary(Fopts, title="Options from file") + # Create instances of the MD Engine objects. + MDEngine = Engine(name=Sim.type, **Sim.EngOpts) - # Read the command line options (they can override the options from file.) - # Calculate energy / dipole derivatives. - AGrad = Copts['gradient'] or Fopts['gradient'] - # Whether to minimize the energy. - minimize = Copts['minimize'] or Fopts['minimize'] - # Engine name. - engname = Fopts['engname'] - # - threads = Copts.get('threads', Fopts.get('threads', 1)) + sys.exit() # # Get the temperature. - # temperature = Copts.get('temperature', Fopts.get('temperature', None)) + # temperature = args.get('temperature', Fopts.get('temperature', None)) # # Get the pressure. - # pressure = Copts.get('pressure', Fopts.get('pressure', None)) + # pressure = args.get('pressure', Fopts.get('pressure', None)) # # - # nequil = Copts.get('nequil', Fopts.get('nequil')) + # nequil = args.get('nequil', Fopts.get('nequil')) #---- # load some options from file diff --git a/src/simulation.py b/src/simulation.py deleted file mode 100644 index 7bd52ae91..000000000 --- a/src/simulation.py +++ /dev/null @@ -1,76 +0,0 @@ -import os -from forcebalance.molecule import Molecule -from collections import OrderedDict - -class Simulation(object): - - """ - Data container for a MD simulation (specified by index, simulation - type, initial condition). These settings are written to a file - then passed to md_one.py. - - The Simulation object is passed between the master ForceBalance - process and the remote script (e.g. md_one.py). - """ - - type_settings = {'gas': {'pbc' : 0}, - 'liquid': {'pbc' : 1}, - 'solid': {'pbc' : 1, 'anisotropic_box' : 1}, - 'bilayer': {'pbc' : 1, 'anisotropic_box' : 1}} - - def __init__(self, target, name, index, stype, initial, iframe, tsnames): - print target.root, target.tgtdir - raw_input() - # The simulation name will identify the simulation within a collection - # belonging to the Index. - self.name = name - # The Index that the simulation belongs to. - self.index = index - # The type of simulation (liquid, gas, solid, bilayer...) - if stype not in Simulation.type_settings.keys(): - logger.error('Simulation type %s is not supported at this time') - raise RuntimeError - self.type = stype - # The file containing initial coordinates. - self.initial = initial - # The frame number in the initial coordinate file. - self.iframe = iframe - # The time series for the simulation. - self.timeseries = OrderedDict([(i, []) for i in tsnames]) - # The file extension that the coordinate file will be written with. - self.fext = os.path.splitext(initial)[1] - # The file name of the coordinate file. - self.coords = "%s%s" % (self.type, self.fext) - # The number of threads for this simulation. - self.threads = target.OptionDict.get('md_threads', 1) - # Whether to use multiple timestep integrator. - self.mts = target.OptionDict.get('mts_integrator', 0) - # The number of beads in an RPMD simulation. - self.rpmd_beads = target.OptionDict.get('rpmd_beads', 0) - # Whether to use the CUDA platform (OpenMM only). - self.force_cuda = target.OptionDict.get('force_cuda', 0) - # Number of MD steps between successive calls to Monte Carlo barostat (OpenMM only). - self.nbarostat = target.OptionDict.get('n_mcbarostat', 25) - # Flag for anisotropic simulation cell. - self.anisotropic = target.OptionDict.get('anisotropic_box', 0) - # Flag for minimizing the energy. - self.minimize = target.OptionDict.get('minimize_energy', 0) - # Finite difference step size. - self.h = target.h - # Name of the simulation engine. - self.engname = target.engname - # Whether to use periodic boundary conditions. - self.pbc = Simulation.type_settings[self.type]['pbc'] - # Gromacs-specific options. - if self.engname == 'gromacs': - self.gmxpath = target.gmxpath - self.gmxsuffix = target.gmxsuffix - elif self.engname == 'tinker': - self.tinkerpath = target.tinkerpath - - def __str__(self): - msg = [] - msg.append("Simulation: Name %s, Index %s, Type %s" % (self.name, self.index, self.type)) - msg.append("Initial Conditions: File %s Frame %i" % (self.initial, self.iframe)) - msg.append("Timeseries Names: %s" % (', '.join(self.timeseries.keys()))) - return "\n".join(msg) diff --git a/src/thermo.py b/src/thermo.py index 19f66a43c..56cdf7dd5 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -3,13 +3,13 @@ import csv import copy import errno +import shutil import numpy as np import pandas as pd import itertools import cStringIO from forcebalance.molecule import Molecule -from forcebalance.simulation import Simulation from forcebalance.observable import OMap from forcebalance.target import Target from forcebalance.finite_difference import in_fd @@ -27,6 +27,14 @@ # print logger.parent.parent.handlers[0] # logger.parent.parent.handlers = [] +def getval(dframe, col): + """ Extract the single non-NaN value from a column. """ + nnan = [i for i in dframe[col] if not isnpnan(i)] + if len(nnan) != 1: + logger.error('%i values in column %s are not NaN (expected only 1)' % (len(nnan), col)) + raise RuntimeError + return nnan[0] + class TextParser(object): """ Parse a text file. """ def __init__(self, fnm): @@ -351,7 +359,7 @@ def stand_head(head, obs): logger.debug("header %s renamed to %s\n" % (hfirst, newh)) return newh, punit, obs -def find_file(tgtdir, index, stype, sufs, icn): +def find_file(tgtdir, index, stype, sufs, iscrd, icn=0): """ Search for a suitable file that matches the simulation index, type, suffix and IC number. This can be used to search for @@ -381,9 +389,11 @@ def find_file(tgtdir, index, stype, sufs, icn): targets/target_name/index/stype.suf targets/target_name/stype.suf - @param[in] index Name of the index directory to look in + @param[in] tgtdir Name of the target directory to look in + @param[in] index Name of the index directory to look in (within tgtdir) @param[in] stype Name of the simulation type to look for @param[in] sufs List of suffixes to look for in order of priority + @param[in] iscrd Whether the file is a coordinate file (false for auxiliary files like .mdp). @param[in] icn Initial coordinate number (will look for sequentially numbered file, or single file with multiple structures) """ found = '' @@ -411,7 +421,7 @@ def find_file(tgtdir, index, stype, sufs, icn): logger.info('Target %s Index %s Simulation %s : ' '%s overrides %s\n' % (os.path.basename(tgtdir), index, stype, fpath)) else: - if not numbered: + if iscrd and not numbered: M = Molecule(fpath) if len(M) <= icn: logger.error("Target %s Index %s Simulation %s : " @@ -420,7 +430,7 @@ def find_file(tgtdir, index, stype, sufs, icn): raise RuntimeError logger.info('Target %s Index %s Simulation %s : ' 'found file %s\n' % (os.path.basename(tgtdir), index, stype, fpath)) - found = fpath + found = os.path.abspath(fpath) if found == '': logger.error("Can't find a file for index %s, simulation %s, suffix %s in the search path" % (index, stype, '/'.join(sufs))) raise RuntimeError @@ -445,13 +455,15 @@ def __init__(self, options, tgt_opts, forcefield): # Length of simulation chain self.set_option(tgt_opts, "simulations", "user_simulation_names", forceprint=True) # Number of time steps in the equilibration run - self.set_option(tgt_opts, "eq_steps", "nequil", forceprint=True) + self.set_option(tgt_opts, "eq_steps", forceprint=True) # Number of time steps in the production run - self.set_option(tgt_opts, "md_steps", "nsteps", forceprint=True) + self.set_option(tgt_opts, "md_steps", forceprint=True) # Time step (in femtoseconds) self.set_option(tgt_opts, "timestep", forceprint=True) # Sampling interval (in picoseconds) - self.set_option(tgt_opts, "interval", "sample", forceprint=True) + self.set_option(tgt_opts, "interval", forceprint=True) + # Save trajectories? + self.set_option(tgt_opts, "save_traj", forceprint=True) ## Variables # Prefix names for simulation data @@ -729,9 +741,8 @@ def initialize_simulations(self): else: n_ic = 1 for icn in range(n_ic): - icfnm, icframe = find_file(self.tgtdir, index, stype, self.crdsfx, icn) sname = "%s_%i" % (stype, icn) if n_ic > 1 else stype - self.Simulations[index].append(Simulation(self, sname, index, stype, icfnm, icframe, sorted(list(tsset)))) + self.Simulations[index].append(Simulation(self, self.Data.ix[index], sname, index, stype, icn, sorted(list(tsset)))) return def submit_jobs(self, mvals, AGrad=True, AHess=True): @@ -763,52 +774,25 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True): temp = self.Data2['temp'].ix[index] if 'temp' in self.Data2 else None pres = self.Data2['pres'].ix[index] if 'pres' in self.Data2 else None for Sim in self.Simulations[index]: + Sim.gradient = AGrad simd = os.path.join(os.getcwd(), index, Sim.name) GoInto(simd) # Submit or run the simulation if the result file does not exist. if not (os.path.exists('result.p') or os.path.exists('result.p.bz2')): - # Write to disk: Force field object, current parameter values, target options + # Write coordinate file in the current location. M = Molecule(os.path.join(self.root, Sim.initial))[Sim.iframe] - M.write(Sim.coords) - # # Get relevant files from the target folder, I suppose. - # link_dir_contents(os.path.join(self.root,self.rundir),os.getcwd()) - # # Determine initial coordinates. - # self.last_traj += [os.path.join(os.getcwd(), i) for i in self.extra_output] - # self.liquid_mol[simnum%len(self.liquid_mol)].write(self.liquid_coords, ftype='tinker' if self.engname == 'tinker' else None) - # Command for running the simulation. - ## Copy run scripts from ForceBalance installation directory - # We can build the entire MD options dictionary here!! - # Update dictionary with simulation options. - # OptionDict = copy.deepcopy(self.OptionDict) - # OptionDict['gradient'] = AGrad - # Sim.gradient = AGrad - # Sim.nequil = self.nequil - # Sim.nsteps = self.nsteps - # Sim.timestep = self.timestep - # Sim.sample = self.sample - # Sim.h = - # Sim.pgrad = - # OptionDict['coords'] = "%s%s" % (Sim.type, self.crdsfx[0]) - # OptionDict.update(vars(Sim)) - # OptionDict['simtype'] = Sim.type - # # In the future we should have these settings - # OptionDict['nequil'] = self.nequil - # OptionDict['nsteps'] = self.nsteps - # OptionDict['timestep'] = self.timestep - # OptionDict['sample'] = self.sample - # OptionDict['minimize'] = self.minimize - # printcool_dictionary(vars(Sim)) - # SimOpts = dict(vars(Sim)) - Opts = vars(Sim) - Opts['gradient'] = AGrad - Opts['pgrad'] = self.pgrad - - with wopen('forcebalance.p') as f: lp_dump((self.FF,mvals,Opts),f) + M.write(Sim.EngOpts['coords']) + # Copy auxiliary files to the current location. + for i, j in Sim.faux.values(): + shutil.copy2(i, j) + # Write to disk: Force field object, current parameter values, target options + with wopen('forcebalance.p') as f: lp_dump((self.FF,mvals,Sim),f) + # Copy scripts to the current location. for f in self.scripts: LinkFile(os.path.join(os.path.split(__file__)[0], "data", f), os.path.join(os.getcwd(), f)) + # Put together the command. cmdlist = ['%s python md_one.py %s' % (self.mdpfx, Sim.type)] - #cmdlist.append('-eq %i -md %i -dt %g -sp %g' % (self.nequil, self.nsteps, self.timestep, self.sample)) if temp != None: cmdlist.append('-T %g' % float(temp)) if pres != None: @@ -1106,3 +1090,127 @@ def __str__(self): return "\n".join(msg) +class Simulation(object): + + """ + Data container for a MD simulation (specified by index, simulation + type, initial condition). These settings are written to a file + then passed to md_one.py. + + The Simulation object is passed between the master ForceBalance + process and the remote script (e.g. md_one.py). + """ + + type_settings = {'gas': {'pbc' : 0}, + 'liquid': {'pbc' : 1}, + 'solid': {'pbc' : 1, 'anisotropic_box' : 1}, + 'bilayer': {'pbc' : 1, 'anisotropic_box' : 1}} + + def __init__(self, tgt, data, name, index, stype, icn, tsnames): + + # The name of the simulation (refers to a directory under job.tmp/target/iter_x/index/name) + self.name = name + # The Index that the simulation belongs to. + self.index = index + # The type of simulation (liquid, gas, solid, bilayer...) + if stype not in Simulation.type_settings.keys(): + logger.error('Simulation type %s is not supported at this time') + raise RuntimeError + # The reference data! May contain parameters for calculating observables. + self.Data = copy.deepcopy(data) + # Type of the simulation (map to simulation settings) + self.type = stype + # Locate the initial coordinate file and frame number. + self.initial, self.iframe = find_file(os.path.join(tgt.root, tgt.tgtdir), index, stype, tgt.crdsfx, True, icn) + # The time series for the simulation. + self.timeseries = OrderedDict([(i, []) for i in tsnames]) + # The file extension that the coordinate file will be written with. + self.fext = os.path.splitext(self.initial)[1] + # Auxiliary files to be copied to the current location prior to running the simulation. + self.faux = OrderedDict() + for sfx in tgt.auxsfx: + auxf = find_file(os.path.join(tgt.root, tgt.tgtdir), index, stype, sfx, False)[0] + self.faux[os.path.splitext(auxf)[1]] = (auxf, "%s%s" % (self.type, os.path.splitext(auxf)[1])) + # Name of the simulation engine + self.engname = tgt.engname + # Whether to use the CUDA platform (OpenMM only). + self.force_cuda = tgt.OptionDict.get('force_cuda', 0) + # Finite difference step size. + self.h = tgt.h + # Active parameters to differentiate over. + self.pgrad = tgt.pgrad + + pbc = Simulation.type_settings[self.type]['pbc'] + + #---- + # MD options, passed straight to the molecular_dynamics() method + #---- + self.MDOpts = OrderedDict() + # The time step in femtoseconds. + self.MDOpts['timestep'] = tgt.timestep + # The number of equilibration MD steps. + self.MDOpts['nequil'] = tgt.eq_steps + # The number of production MD steps. + self.MDOpts['nsteps'] = tgt.md_steps + # The number of MD steps between sampling. + self.MDOpts['nsave'] = int(1000 * tgt.interval / self.MDOpts['timestep']) + # Flag for minimizing the energy. + self.MDOpts['minimize'] = tgt.OptionDict.get('minimize_energy', 0) + # The number of threads for this simulation (no-PBC simulations are 1 thread). + self.MDOpts['threads'] = tgt.OptionDict.get('md_threads', 1) if pbc else 1 + # Whether to use multiple timestep integrator. + self.MDOpts['mts'] = tgt.OptionDict.get('mts_integrator', 0) + # The number of beads in an RPMD simulation. + self.MDOpts['rpmd_beads'] = tgt.OptionDict.get('rpmd_beads', 0) + # Print out lots of information. + self.MDOpts['verbose'] = True + # Save trajectory to disk. + self.MDOpts['save_traj'] = tgt.save_traj + # Number of MD steps between successive calls to Monte Carlo barostat (OpenMM only). + self.MDOpts['nbarostat'] = tgt.OptionDict.get('n_mcbarostat', 25) + # Flag for anisotropic simulation cell (OpenMM only). + self.MDOpts['anisotropic'] = tgt.OptionDict.get('anisotropic_box', 0) + # The time step for the 'fast forces' in femtoseconds in MTS integrators. + self.MDOpts['timestep'] = tgt.OptionDict.get('faststep', 0.25) + # Simulation temperature in Kelvin. + self.MDOpts['temperature'] = getval(self.Data, 'temp') if 'temp' in self.Data else None + # Simulation pressure in bar. + self.MDOpts['pressure'] = getval(self.Data, 'pres') if 'pres' in self.Data else None + + #---- + # Engine options, used in creating the Engine object + #---- + self.EngOpts = OrderedDict() + # Whether to use periodic boundary conditions. + self.EngOpts['pbc'] = pbc + # The name of the coordinate file to be written prior to running the simulation. + self.EngOpts['coords'] = "%s%s" % (self.type, self.fext) + # Software-specific options. + if self.engname == 'openmm': + self.EngOpts['platname'] = 'CUDA' if self.EngOpts['pbc'] else 'Reference' + else: + if self.force_cuda: + logger.error("force_cuda option is set, but has no effect on Gromacs engine.") ; raise RuntimeError + if self.MDOpts['rpmd_beads'] > 0: + logger.error('Only the OpenMM engine can handle RPMD simulations.') ; raise RuntimeError + if self.MDOpts['mts']: + logger.error('Only OpenMM is configured to use multiple timestep integrator.') ; raise RuntimeError + if self.MDOpts['anisotropic']: + logger.error('Only OpenMM is configured to use anisotropic pressure coupling.') ; raise RuntimeError + + if self.engname == 'gromacs': + self.EngOpts['gmxpath'] = tgt.gmxpath + self.EngOpts['gmxsuffix'] = tgt.gmxsuffix + self.EngOpts['gmx_top'] = self.faux['.top'][1] + self.EngOpts['gmx_mdp'] = self.faux['.mdp'][1] + + if self.engname == 'tinker': + self.EngOpts['tinkerpath'] = tgt.tinkerpath + self.EngOpts['tinker_key'] = self.faux['.key'][1] + + def __str__(self): + msg = [] + msg.append("Simulation: Name %s, Index %s, Type %s" % (self.name, self.index, self.type)) + msg.append("Initial Conditions: File %s Frame %i" % (self.initial, self.iframe)) + msg.append("Timeseries Names: %s" % (', '.join(self.timeseries.keys()))) + return "\n".join(msg) diff --git a/studies/004_thermo/targets/LiquidBromine/1/gas.mdp b/studies/004_thermo/targets/LiquidBromine/1/gas.mdp index 1a64558b6..2a7427065 100644 --- a/studies/004_thermo/targets/LiquidBromine/1/gas.mdp +++ b/studies/004_thermo/targets/LiquidBromine/1/gas.mdp @@ -11,16 +11,16 @@ nstxtcout = 50 xtc_grps = System energygrps = System -nstlist = 10 -ns_type = grid -rlist = 0.9 +nstlist = 0 +ns_type = simple +rlist = 0.0 vdwtype = cut-off coulombtype = cut-off -rcoulomb = 0.9 -rvdw = 0.9 -rvdw_switch = 0.9 +rcoulomb = 0.0 +rvdw = 0.0 +rvdw_switch = 0.0 constraints = all-bonds -pbc = xyz +pbc = no tcoupl = v-rescale tc_grps = System From c256ce09d8270b87c55b566150a70a53e959ed1e Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Tue, 22 Apr 2014 22:38:05 -0700 Subject: [PATCH 20/25] md_one.py runs molecular dynamics! --- src/data/md_one.py | 14 ++++++++++---- src/gmxio.py | 15 +++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/data/md_one.py b/src/data/md_one.py index ff1763179..46b8b0bcf 100644 --- a/src/data/md_one.py +++ b/src/data/md_one.py @@ -21,6 +21,7 @@ import numpy as np import importlib as il +from forcebalance.nifty import click from forcebalance.nifty import lp_dump, lp_load, wopen from forcebalance.nifty import printcool, printcool_dictionary from forcebalance.molecule import Molecule @@ -89,13 +90,13 @@ traceback.print_exc() raise Exception("Cannot import OpenMM modules") from forcebalance.openmmio import * - Engine = OpenMM + EngineClass = OpenMM elif engname == "gromacs" or engname == "gmx": from forcebalance.gmxio import * - Engine = GMX + EngineClass = GMX elif engname == "tinker": from forcebalance.tinkerio import * - Engine = TINKER + EngineClass = TINKER else: raise Exception('OpenMM, GROMACS, and TINKER are supported at this time.') @@ -147,7 +148,12 @@ def main(): pgrad = Sim.pgrad # Create instances of the MD Engine objects. - MDEngine = Engine(name=Sim.type, **Sim.EngOpts) + Engine = EngineClass(name=Sim.type, **Sim.EngOpts) + + click() # Start timer. + # This line runs the condensed phase simulation. + prop_return = Engine.molecular_dynamics(**Sim.MDOpts) + logger.info("MD simulation took %.3f seconds\n" % click()) sys.exit() diff --git a/src/gmxio.py b/src/gmxio.py index b43dc205c..12d9159b3 100644 --- a/src/gmxio.py +++ b/src/gmxio.py @@ -1120,13 +1120,18 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None, if temperature != None: md_opts["ref_t"] = temperature md_opts["gen_vel"] = "no" + # Set some default methods for temperature coupling. md_defs["tc_grps"] = "System" md_defs["tcoupl"] = "v-rescale" md_defs["tau_t"] = 1.0 if self.pbc: md_opts["comm_mode"] = "linear" + # Removing center of mass motion at every time step should not impact performance. + # http://gromacs.5086.x6.nabble.com/COM-motion-removal-td4413458.html + md_opts["nstcomm"] = 1 if pressure != None: md_opts["ref_p"] = pressure + # Set some default methods for pressure coupling. md_defs["pcoupl"] = "parrinello-rahman" md_defs["tau_p"] = 1.5 else: @@ -1172,6 +1177,16 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None, self.warngmx("grompp -c %s -p %s.top -f %s-md.mdp -o %s-md.tpr" % (gro2, self.name, self.name, self.name), warnings=warnings, print_command=verbose) self.callgmx("mdrun -v -deffnm %s-md -nt %i -stepout %i" % (self.name, threads, nsave), print_command=verbose, print_to_screen=verbose) + if verbose: logger.info("Finished!\n") + + # Final frame of molecular dynamics. + self.md_final = "%s-md.gro" % self.name + + if 1: return + + #---- + # Below + #---- self.mdtraj = '%s-md.trr' % self.name if verbose: logger.info("Production run finished, calculating properties...\n") From a8ff3bb5b5826f8ecf1aa0de994683a6e2fa6413 Mon Sep 17 00:00:00 2001 From: leeping Date: Wed, 23 Apr 2014 14:55:30 -0700 Subject: [PATCH 21/25] Work on extracting timeseries --- src/data/md_one.py | 15 +++- src/gmxio.py | 201 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 170 insertions(+), 46 deletions(-) diff --git a/src/data/md_one.py b/src/data/md_one.py index 46b8b0bcf..70f6256da 100644 --- a/src/data/md_one.py +++ b/src/data/md_one.py @@ -152,8 +152,19 @@ def main(): click() # Start timer. # This line runs the condensed phase simulation. - prop_return = Engine.molecular_dynamics(**Sim.MDOpts) + Engine.molecular_dynamics(**Sim.MDOpts) logger.info("MD simulation took %.3f seconds\n" % click()) + + # Extract properties. + Results = Engine.md_extract(OrderedDict([(i, {}) for i in Sim.timeseries.keys()])) + + # Dump results to file + logger.info("Writing final force field.\n") + pvals = FF.make(mvals) + + logger.info("Writing all simulation data to disk.\n") + with wopen('md_result.p') as f: + lp_dump(Results, f) sys.exit() @@ -212,6 +223,8 @@ def main(): # if Fopts['threads'] > 1: printcool_dictionary(EngOpts, title="Engine options") + + # Number of threads, multiple timestep integrator, anisotropic box etc. # threads = Fopts.get('md_threads', 1) # mts = Fopts.get('mts_integrator', 0) diff --git a/src/gmxio.py b/src/gmxio.py index 12d9159b3..181ad4557 100644 --- a/src/gmxio.py +++ b/src/gmxio.py @@ -8,6 +8,7 @@ import os, sys import re +import pandas as pd from forcebalance.nifty import * from forcebalance.nifty import _exec from forcebalance import BaseReader @@ -1182,28 +1183,25 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None, # Final frame of molecular dynamics. self.md_final = "%s-md.gro" % self.name - if 1: return - - #---- - # Below - #---- + # Name of the molecular dynamics trajectory. self.mdtraj = '%s-md.trr' % self.name - if verbose: logger.info("Production run finished, calculating properties...\n") - # Figure out dipoles - note we use g_dipoles and not the multipole_moments function. - self.callgmx("g_dipoles -s %s-md.tpr -f %s-md.trr -o %s-md-dip.xvg -xvg no" % (self.name, self.name, self.name), stdin="System\n") - - # Figure out which energy terms need to be printed. - energyterms = self.energy_termnames(edrfile="%s-md.edr" % self.name) - ekeep = [k for k,v in energyterms.items() if v <= energyterms['Total-Energy']] - ekeep += ['Temperature', 'Volume', 'Density'] - - # Calculate deuterium order parameter for bilayer optimization. + # Call md_extract and return the prop_return dictionary (backward compatibility with old functionality.) + Extract = self.md_extract(OrderedDict([(i, 0) for i in ['potential', 'kinetic', 'dipole', 'components']])) + prop_return = {'Potentials': Extract['potential'], + 'Kinetics': Extract['kinetic'], + 'Dips': Extract['dipole'], + 'Ecomps': Extract['components']} + if pbc: + Extract1 = self.md_extract(OrderedDict([(i, 0) for i in ['density', 'volume']])) + prop_return['Rhos'] = Extract['density'] + prop_return['Volumes'] = Extract['volume'] if bilayer: n_snap = self.n_snaps(nsteps, 1000, timestep) Scds = self.calc_scd(n_snap, timestep) al_vars = ['Box-Y', 'Box-X'] - self.callgmx("g_energy -f %s-md.edr -o %s-md-energy-xy.xvg -xvg no" % (self.name, self.name), stdin="\n".join(al_vars)) + self.callgmx("g_energy -f %s-md.edr -o %s-md-energy-xy.xvg -xvg no" % + (self.name, self.name), stdin="\n".join(al_vars)) Xs = [] Ys = [] for line in open("%s-md-energy-xy.xvg" % self.name): @@ -1213,38 +1211,150 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None, Xs = np.array(Xs) Ys = np.array(Ys) Als = (Xs * Ys) / 64 - else: - Scds = 0 - Als = 0 + prop_return['Scds'] = Scds + prop_return['Als'] = Als - # Perform energy component analysis and return properties. - self.callgmx("g_energy -f %s-md.edr -o %s-md-energy.xvg -xvg no" % (self.name, self.name), stdin="\n".join(ekeep)) - ecomp = OrderedDict() - Rhos = [] - Volumes = [] - Kinetics = [] - Potentials = [] - for line in open("%s-md-energy.xvg" % self.name): - s = [float(i) for i in line.split()] - for i in range(len(ekeep) - 2): - val = s[i+1] - if ekeep[i] in ecomp: - ecomp[ekeep[i]].append(val) - else: - ecomp[ekeep[i]] = [val] - Rhos.append(s[-1]) - Volumes.append(s[-2]) - Rhos = np.array(Rhos) - Volumes = np.array(Volumes) - Potentials = np.array(ecomp['Potential']) - Kinetics = np.array(ecomp['Kinetic-En.']) - Dips = np.array([[float(i) for i in line.split()[1:4]] for line in open("%s-md-dip.xvg" % self.name)]) - Ecomps = OrderedDict([(key, np.array(val)) for key, val in ecomp.items()]) - # Initialized property dictionary. - prop_return = OrderedDict() - prop_return.update({'Rhos': Rhos, 'Potentials': Potentials, 'Kinetics': Kinetics, 'Volumes': Volumes, 'Dips': Dips, 'Ecomps': Ecomps, 'Als': Als, 'Scds': Scds}) if verbose: logger.info("Finished!\n") return prop_return + # ecomp = OrderedDict() + # for line in open("%s-md-energy.xvg" % self.name): + # s = [float(i) for i in line.split()][1:] + # for i, j in enumerate(ekeep): + # val = s[i] + # if j in ecomp: + # ecomp[j].append(s[i]) + # else: + # ecomp[j] = [s[i]] + # print ecomp.keys() + # Rhos.append(s[-1]) + # Volumes.append(s[-2]) + # Calculate deuterium order parameter for bilayer optimization. + # # Perform energy component analysis and return properties. + # self.callgmx("g_energy -f %s-md.edr -o %s-md-energy.xvg -xvg no" % (self.name, self.name), stdin="\n".join(ekeep)) + # ecomp = OrderedDict() + # Rhos = [] + # Volumes = [] + # Kinetics = [] + # Potentials = [] + # for line in open("%s-md-energy.xvg" % self.name): + # s = [float(i) for i in line.split()] + # for i in range(len(ekeep) - 2): + # val = s[i+1] + # if ekeep[i] in ecomp: + # ecomp[ekeep[i]].append(val) + # else: + # ecomp[ekeep[i]] = [val] + # Rhos.append(s[-1]) + # Volumes.append(s[-2]) + # Rhos = np.array(Rhos) + # Volumes = np.array(Volumes) + # Potentials = np.array(ecomp['Potential']) + # Kinetics = np.array(ecomp['Kinetic-En.']) + # Ecomps = OrderedDict([(key, np.array(val)) for key, val in ecomp.items()]) + # # Initialized property dictionary. + # prop_return = OrderedDict() + # prop_return.update({'Rhos': Rhos, 'Potentials': Potentials, 'Kinetics': Kinetics, 'Volumes': Volumes, 'Dips': Dips, 'Ecomps': Ecomps, 'Als': Als, 'Scds': Scds}) + # if verbose: logger.info("Finished!\n") + # return prop_return + + def md_extract(self, tsspec, verbose=True): + """ + Extract time series from the MD trajectory / energy file. + Since Gromacs can do so many things in a single call to + g_energy, we implement all the functionality in a single big + function (it can be split off later.) + + @param[in] tsspec Dictionary with tsnames : tsparams key/value + pairs. tsparams contains any extra information needed to + calculate the observable (e.g. atom indices in S_cd) but it + may also be None. + + @return answer Dictionary with tsnames : timeseries key/value pairs. + The leading dimension of the time series is always the sample axis. + """ + + if not hasattr(self, 'mdtraj') or not os.path.exists(self.mdtraj): + logger.error('Called the md_extract method without having an MD trajectory!') + raise RuntimeError + + if verbose: logger.info("Calculating properties...\n") + + Output = OrderedDict() + + # Figure out which energy terms need to be printed. + energyterms = self.energy_termnames(edrfile="%s-md.edr" % self.name) + """ + For reference the menu from g_energy may look like this. + + Select the terms you want from the following list by + selecting either (part of) the name or the number or a combination. + End your selection with an empty line or a zero. + ------------------------------------------------------------------- + 1 LJ-(SR) 2 Disper.-corr. 3 Coulomb-(SR) 4 Potential + 5 Kinetic-En. 6 Total-Energy 7 Temperature 8 Pres.-DC + 9 Pressure 10 Constr.-rmsd 11 Box-X 12 Box-Y + 13 Box-Z 14 Volume 15 Density 16 pV + 17 Enthalpy 18 Vir-XX 19 Vir-XY 20 Vir-XZ + 21 Vir-YX 22 Vir-YY 23 Vir-YZ 24 Vir-ZX + 25 Vir-ZY 26 Vir-ZZ 27 Pres-XX 28 Pres-XY + 29 Pres-XZ 30 Pres-YX 31 Pres-YY 32 Pres-YZ + 33 Pres-ZX 34 Pres-ZY 35 Pres-ZZ 36 #Surf*SurfTen + 37 Box-Vel-XX 38 Box-Vel-YY 39 Box-Vel-ZZ 40 T-System + 41 Lamb-System + """ + + # Term names that we want to get from g_energy. + ekeep = [] + # Save anything that comes before Total-Energy if doing an energy component analysis. + if 'components' in tsspec: + ecomp = [k for k,v in energyterms.items() if v <= energyterms['Total-Energy']] + ekeep += ecomp[:] + # These are time series which can be directly copied from g_energy output. + copy_keys = {'energy' : 'Total-Energy', 'potential' : 'Potential', 'kinetic' : 'Kinetic-En.', + 'temperature' : 'Temperature', 'pressure' : 'Pressure', 'volume' : 'Volume', + 'density' : 'Density', 'pv' : 'pV'} + for i in copy_keys: + if i in tsspec and copy_keys[i] not in ekeep: + ekeep.append(copy_keys[i]) + # Area per lipid requires Box-X and Box-Y time series. + if 'al' in tsspec: + ekeep += ['Box-X', 'Box-Y'] + ekeep = list(set(ekeep)) + eksort = [] + for i in energyterms.keys(): + for j in ekeep: + if j not in energyterms.keys(): + logger.error('Energy term in ekeep %s is not present in edr file' % j) + raise RuntimeError + if i == j: eksort.append(j) + + # Perform energy component analysis and return properties. + self.callgmx("g_energy -f %s-md.edr -o %s-md-energy.xvg -xvg no" % (self.name, self.name), stdin="\n".join(eksort)) + + DF = pd.DataFrame([[float(i) for i in line.split()[1:]] for line in open("%s-md-energy.xvg" % self.name)], columns=eksort, + index = pd.Index([float(line.split()[0]) for line in open("%s-md-energy.xvg" % self.name)], name='time')) + # Now take the output values from g_energy and allocate them into the Output dictionary. + for i in tsspec: + if i in copy_keys: + Output[i] = np.array(DF[copy_keys[i]]) + if 'components' in tsspec: + for i in ecomp: + Output[i] = np.array(DF[copy_keys[i]]) + # Area per lipid. + # HARD CODED NUMBER: number of lipid molecules! + if 'al' in tsspec: + Output['al'] = np.array(DF['Box-X'])*np.array(DF['Box-Y']) / 64 + + # Dipole moments; note we use g_dipoles and not the multipole_moments function. + if 'dipole' in tsspec: + self.callgmx("g_dipoles -s %s-md.tpr -f %s-md.trr -o %s-md-dip.xvg -xvg no" % + (self.name, self.name, self.name), stdin="System\n") + Output['dipole'] = np.array([[float(i) for i in line.split()[1:4]] + for line in open("%s-md-dip.xvg" % self.name)]) + + printcool_dictionary(Output, title = 'Output') + return Output + def md(self, nsteps=0, nequil=0, verbose=False, deffnm=None, **kwargs): @@ -1520,3 +1630,4 @@ def __init__(self,options,tgt_opts,forcefield): self.scripts = ['gmxprefix.bash', 'md_one.py'] ## Initialize base class. super(Thermo_GMX,self).__init__(options,tgt_opts,forcefield) + From 537ed48c0125880648c048e0114e08e748100fbd Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Wed, 23 Apr 2014 16:16:16 -0700 Subject: [PATCH 22/25] Clean up --- src/gmxio.py | 73 ++++++++++------------------------------------------ 1 file changed, 14 insertions(+), 59 deletions(-) diff --git a/src/gmxio.py b/src/gmxio.py index 181ad4557..b48d1799a 100644 --- a/src/gmxio.py +++ b/src/gmxio.py @@ -1193,69 +1193,16 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None, 'Dips': Extract['dipole'], 'Ecomps': Extract['components']} if pbc: - Extract1 = self.md_extract(OrderedDict([(i, 0) for i in ['density', 'volume']])) - prop_return['Rhos'] = Extract['density'] - prop_return['Volumes'] = Extract['volume'] + Extract_ = self.md_extract(OrderedDict([(i, 0) for i in ['density', 'volume']])) + prop_return['Rhos'] = Extract_['density'] + prop_return['Volumes'] = Extract_['volume'] if bilayer: - n_snap = self.n_snaps(nsteps, 1000, timestep) - Scds = self.calc_scd(n_snap, timestep) - al_vars = ['Box-Y', 'Box-X'] - self.callgmx("g_energy -f %s-md.edr -o %s-md-energy-xy.xvg -xvg no" % - (self.name, self.name), stdin="\n".join(al_vars)) - Xs = [] - Ys = [] - for line in open("%s-md-energy-xy.xvg" % self.name): - s = [float(i) for i in line.split()] - Xs.append(s[-1]) - Ys.append(s[-2]) - Xs = np.array(Xs) - Ys = np.array(Ys) - Als = (Xs * Ys) / 64 - prop_return['Scds'] = Scds - prop_return['Als'] = Als + Extract__ = self.md_extract(OrderedDict([(i, 0) for i in ['al', 'scd']])) + prop_return['Als'] = Extract__['al'] + prop_return['Scds'] = Extract__['scd'] if verbose: logger.info("Finished!\n") return prop_return - # ecomp = OrderedDict() - # for line in open("%s-md-energy.xvg" % self.name): - # s = [float(i) for i in line.split()][1:] - # for i, j in enumerate(ekeep): - # val = s[i] - # if j in ecomp: - # ecomp[j].append(s[i]) - # else: - # ecomp[j] = [s[i]] - # print ecomp.keys() - # Rhos.append(s[-1]) - # Volumes.append(s[-2]) - # Calculate deuterium order parameter for bilayer optimization. - # # Perform energy component analysis and return properties. - # self.callgmx("g_energy -f %s-md.edr -o %s-md-energy.xvg -xvg no" % (self.name, self.name), stdin="\n".join(ekeep)) - # ecomp = OrderedDict() - # Rhos = [] - # Volumes = [] - # Kinetics = [] - # Potentials = [] - # for line in open("%s-md-energy.xvg" % self.name): - # s = [float(i) for i in line.split()] - # for i in range(len(ekeep) - 2): - # val = s[i+1] - # if ekeep[i] in ecomp: - # ecomp[ekeep[i]].append(val) - # else: - # ecomp[ekeep[i]] = [val] - # Rhos.append(s[-1]) - # Volumes.append(s[-2]) - # Rhos = np.array(Rhos) - # Volumes = np.array(Volumes) - # Potentials = np.array(ecomp['Potential']) - # Kinetics = np.array(ecomp['Kinetic-En.']) - # Ecomps = OrderedDict([(key, np.array(val)) for key, val in ecomp.items()]) - # # Initialized property dictionary. - # prop_return = OrderedDict() - # prop_return.update({'Rhos': Rhos, 'Potentials': Potentials, 'Kinetics': Kinetics, 'Volumes': Volumes, 'Dips': Dips, 'Ecomps': Ecomps, 'Als': Als, 'Scds': Scds}) - # if verbose: logger.info("Finished!\n") - # return prop_return def md_extract(self, tsspec, verbose=True): """ @@ -1340,11 +1287,19 @@ def md_extract(self, tsspec, verbose=True): if 'components' in tsspec: for i in ecomp: Output[i] = np.array(DF[copy_keys[i]]) + # Area per lipid. # HARD CODED NUMBER: number of lipid molecules! if 'al' in tsspec: Output['al'] = np.array(DF['Box-X'])*np.array(DF['Box-Y']) / 64 + # Deuterium order parameter. + # HARD CODED: atom names of lipid tails! + if 'scd' in tsspec: + n_snap = self.n_snaps(nsteps, 1000, timestep) + Scds = self.calc_scd(n_snap, timestep) + Output['scd'] = Scds + # Dipole moments; note we use g_dipoles and not the multipole_moments function. if 'dipole' in tsspec: self.callgmx("g_dipoles -s %s-md.tpr -f %s-md.trr -o %s-md-dip.xvg -xvg no" % From 3445751a09dc5ef6ba51c14d48e2ee751ce15fcd Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Wed, 23 Apr 2014 16:41:43 -0700 Subject: [PATCH 23/25] Clean up --- src/data/md_one.py | 237 +++------------------------------------------ src/gmxio.py | 15 ++- src/thermo.py | 2 +- 3 files changed, 22 insertions(+), 232 deletions(-) diff --git a/src/data/md_one.py b/src/data/md_one.py index 70f6256da..a1a4bfc7e 100644 --- a/src/data/md_one.py +++ b/src/data/md_one.py @@ -38,8 +38,6 @@ # Note: Only the simulation settings that vary across different # simulations in a target may be specified on the command line. parser = argparse.ArgumentParser() -parser.add_argument('simulation', type=str, - help='The simulation name (important; used in setting up)') parser.add_argument('-T', '--temp', '--temperature', dest='temperature', type=float, help='Simulation temperature, leave blank for constant energy') parser.add_argument('-P', '--pres', '--pressure', dest='pressure', type=float, @@ -104,19 +102,19 @@ def main(): """Usage: - (prefix.sh) md_one.py - -T, --temperature - -P, --pressure - -g, --grad (if gradients of output timeseries are desired) - -o, --outputs - -eq, --nequil - -md, --nsteps - -dt, --timestep - -nt, --interval + (prefix.sh) md_one.py -T, --temperature + -P, --pressure + -g, --grad (if gradients of output timeseries are desired) + -eq, --nequil + -md, --nsteps + -dt, --timestep + -sp, --sample + -nt, --threads + -min, --minimize This program is meant to be called automatically by ForceBalance - because most options are loaded from the 'forcebalance.p' input - file. + because most options are loaded from the 'forcebalance.p' + simulation file. """ @@ -146,233 +144,20 @@ def main(): h = Sim.h # Active parameters to differentiate pgrad = Sim.pgrad - # Create instances of the MD Engine objects. Engine = EngineClass(name=Sim.type, **Sim.EngOpts) - click() # Start timer. # This line runs the condensed phase simulation. Engine.molecular_dynamics(**Sim.MDOpts) logger.info("MD simulation took %.3f seconds\n" % click()) - # Extract properties. Results = Engine.md_extract(OrderedDict([(i, {}) for i in Sim.timeseries.keys()])) - # Dump results to file logger.info("Writing final force field.\n") pvals = FF.make(mvals) - logger.info("Writing all simulation data to disk.\n") with wopen('md_result.p') as f: lp_dump(Results, f) - - sys.exit() - - # # Get the temperature. - # temperature = args.get('temperature', Fopts.get('temperature', None)) - # # Get the pressure. - # pressure = args.get('pressure', Fopts.get('pressure', None)) - # # - # nequil = args.get('nequil', Fopts.get('nequil')) - - #---- - # load some options from file - #---- - # Finite difference step size - h = Fopts['h'] - # Active parameters for taking the gradient - pgrad = Fopts['pgrad'] - # Name of the initial coordinate file - coords = Fopts['coords'] - # Base name of the initial coordinate file - cbase = os.path.splitext(coords)[0] - # Actually start to do stuff. - # Molecule object corresponding to - M = Molecule(coords) - - #---- - # Engine options - #---- - EngOpts = OrderedDict([("coords", coords), ("pbc", Fopts['pbc'])]) - if engname == "openmm": - if pbc: - EngOpts["platname"] = 'CUDA' - else: - EngOpts["platname"] = 'Reference' - # Force crash if asking for the CUDA platform and force_cuda option is on - # (because we don't want to inadvertently run using Reference platform) - if EngOpts["platname"] == 'CUDA' and Fopts['force_cuda']: - try: Platform.getPlatformByName('CUDA') - except: raise RuntimeError('Forcing failure because CUDA platform unavailable') - if threads > 1: - logger.warn("Setting the number of threads will have no effect on OpenMM engine.\n") - elif engname == "gromacs": - # Gromacs-specific options - EngOpts["gmxpath"] = Fopts["gmxpath"] - EngOpts["gmxsuffix"] = Fopts["gmxsuffix"] - EngOpts["gmx_top"] = Fopts["gmx_top"] - EngOpts["gmx_mdp"] = Fopts["gmx_mdp"] - if Fopts['force_cuda']: logger.warn("force_cuda option has no effect on Gromacs engine.") - if Fopts['rpmd_beads'] > 0: raise RuntimeError("Gromacs cannot handle RPMD.") - if Fopts['mts']: logger.warn("Gromacs not configured for multiple timestep integrator.") - if Fopts['anisotropic']: logger.warn("Gromacs not configured for anisotropic box scaling.") - elif engname == "tinker": - EngOpts["tinkerpath"] = Fopts["tinkerpath"] - EngOpts["tinker_key"] = Fopts["tinker_key"] - - # if Fopts['threads'] > 1: - printcool_dictionary(EngOpts, title="Engine options") - - - - # Number of threads, multiple timestep integrator, anisotropic box etc. - # threads = Fopts.get('md_threads', 1) - # mts = Fopts.get('mts_integrator', 0) - # rpmd_beads = Fopts.get('rpmd_beads', 0) - # force_cuda = Fopts.get('force_cuda', 0) - # nbarostat = Fopts.get('n_mcbarostat', 25) - # anisotropic = Fopts.get('anisotropic_box', 0) - # minimize = Fopts.get('minimize_energy', 1) - sys.exit() - - #---- - # Setting up MD simulations - #---- - - - EngOpts["liquid"] = OrderedDict([("coords", liquid_fnm), ("mol", ML), ("pbc", True)]) - GenOpts = OrderedDict([('FF', FF)]) - if engname == "openmm": - # OpenMM-specific options - EngOpts["liquid"]["platname"] = 'CUDA' - EngOpts["gas"]["platname"] = 'Reference' - if force_cuda: - try: Platform.getPlatformByName('CUDA') - except: raise RuntimeError('Forcing failure because CUDA platform unavailable') - if threads > 1: logger.warn("Setting the number of threads will have no effect on OpenMM engine.\n") - elif engname == "gromacs": - # Gromacs-specific options - GenOpts["gmxpath"] = Fopts["gmxpath"] - GenOpts["gmxsuffix"] = Fopts["gmxsuffix"] - EngOpts["liquid"]["gmx_top"] = os.path.splitext(liquid_fnm)[0] + ".top" - EngOpts["liquid"]["gmx_mdp"] = os.path.splitext(liquid_fnm)[0] + ".mdp" - EngOpts["gas"]["gmx_top"] = os.path.splitext(gas_fnm)[0] + ".top" - EngOpts["gas"]["gmx_mdp"] = os.path.splitext(gas_fnm)[0] + ".mdp" - if force_cuda: logger.warn("force_cuda option has no effect on Gromacs engine.") - if rpmd_beads > 0: raise RuntimeError("Gromacs cannot handle RPMD.") - if mts: logger.warn("Gromacs not configured for multiple timestep integrator.") - if anisotropic: logger.warn("Gromacs not configured for anisotropic box scaling.") - elif engname == "tinker": - # Tinker-specific options - GenOpts["tinkerpath"] = Fopts["tinkerpath"] - EngOpts["liquid"]["tinker_key"] = os.path.splitext(liquid_fnm)[0] + ".key" - EngOpts["gas"]["tinker_key"] = os.path.splitext(gas_fnm)[0] + ".key" - if force_cuda: logger.warn("force_cuda option has no effect on Tinker engine.") - if rpmd_beads > 0: raise RuntimeError("TINKER cannot handle RPMD.") - if mts: logger.warn("Tinker not configured for multiple timestep integrator.") - EngOpts["liquid"].update(GenOpts) - EngOpts["gas"].update(GenOpts) - for i in EngOpts: - printcool_dictionary(EngOpts[i], "Engine options for %s" % i) - - # Set up MD options - # These are used in the function call to molecular_dynamics() - - MDOpts["liquid"] = OrderedDict([("nsteps", liquid_nsteps), ("timestep", liquid_timestep), - ("temperature", temperature), ("pressure", pressure), - ("nequil", liquid_nequil), ("minimize", minimize), - ("nsave", int(1000 * liquid_intvl / liquid_timestep)), - ("verbose", True), ('save_traj', Fopts['save_traj']), - ("threads", threads), ("anisotropic", anisotropic), ("nbarostat", nbarostat), - ("mts", mts), ("rpmd_beads", rpmd_beads), ("faststep", faststep)]) - MDOpts["gas"] = OrderedDict([("nsteps", gas_nsteps), ("timestep", gas_timestep), - ("temperature", temperature), ("nsave", int(1000 * gas_intvl / gas_timestep)), - ("nequil", gas_nequil), ("minimize", minimize), ("threads", 1), ("mts", mts), - ("rpmd_beads", rpmd_beads), ("faststep", faststep)]) - - - engines = [] - ## Setup and carry out simulations in chain - for i in range(args.length): - # Simulation files - if engname == "gromacs": - ndx_flag = False - coords = args.name + str(i+1) + ".gro" - top_file = args.name + str(i+1) + ".top" - mdp_file = args.name + str(i+1) + ".mdp" - ndx_file = args.name + str(i+1) + ".ndx" - if os.path.exists(ndx_file): - ndx_flag = True - - mol = Molecule(coords) - #---- - # Set coordinates and molecule for engine - #---- - EngOpts = OrderedDict([("FF", FF), - ("pbc", True), - ("coords", coords), - ("mol", mol)]) - - if engname == "gromacs": - # Gromacs-specific options - EngOpts["gmx_top"] = top_file - EngOpts["gmx_mdp"] = mdp_file - if ndx_flag: - EngOpts["gmx_ndx"] = ndx_file - - printcool_dictionary(EngOpts) - - # Create engine objects and store them for subsequent analysis. - s = Engine(name=args.name+str(i+1), **EngOpts) - - #=====================# - # Run the simulation. # - #=====================# - MDOpts = OrderedDict([("nsteps", args.nsteps), - ("nequil", args.nequil)]) - - printcool("Molecular dynamics simulation", color=4, bold=True) - s.md(verbose=True, **MDOpts) - - engines.append(s) - - #======================================================================# - # Extract the quantities of interest from the MD simulations and dump # - # the results to file. # - # =====================================================================# - results = OrderedDict() - for q in args.quantities: - logger.info("Extracting %s...\n" % q) - - # Initialize quantity - objstr = "Quantity_" + q.capitalize() - dm = il.import_module('..quantity', - package='forcebalance.quantity') - - Quantity = getattr(dm, objstr)(engname, args.temperature, args.pressure) - - Q, Qerr, Qgrad = Quantity.extract(engines, FF, mvals, h, pgrad, AGrad) - - results.setdefault("values", []).append(Q) - results.setdefault("errors", []).append(Qerr) - results.setdefault("grads", []).append(Qgrad) - - logger.info("Finished!\n") - - # Print out results for the quantity and its derivative. - Sep = printcool(("%s: % .4f +- % .4f \nAnalytic Derivative:" - % (q.capitalize(), Q, Qerr))) - FF.print_map(vals=Qgrad) - - # Dump results to file - logger.info("Writing final force field.\n") - pvals = FF.make(mvals) - - logger.info("Writing all simulation data to disk.\n") - with wopen('md_result.p') as f: - lp_dump((np.asarray(results["values"]), - np.asarray(results["errors"]), - np.asarray(results["grads"])), f) if __name__ == "__main__": main() diff --git a/src/gmxio.py b/src/gmxio.py index b48d1799a..d6a9d1732 100644 --- a/src/gmxio.py +++ b/src/gmxio.py @@ -1192,7 +1192,7 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None, 'Kinetics': Extract['kinetic'], 'Dips': Extract['dipole'], 'Ecomps': Extract['components']} - if pbc: + if self.pbc: Extract_ = self.md_extract(OrderedDict([(i, 0) for i in ['density', 'volume']])) prop_return['Rhos'] = Extract_['density'] prop_return['Volumes'] = Extract_['volume'] @@ -1226,8 +1226,6 @@ def md_extract(self, tsspec, verbose=True): if verbose: logger.info("Calculating properties...\n") - Output = OrderedDict() - # Figure out which energy terms need to be printed. energyterms = self.energy_termnames(edrfile="%s-md.edr" % self.name) """ @@ -1278,15 +1276,23 @@ def md_extract(self, tsspec, verbose=True): # Perform energy component analysis and return properties. self.callgmx("g_energy -f %s-md.edr -o %s-md-energy.xvg -xvg no" % (self.name, self.name), stdin="\n".join(eksort)) + DF = pd.DataFrame([[float(i) for i in line.split()[1:]] for line in open("%s-md-energy.xvg" % self.name)], columns=eksort, index = pd.Index([float(line.split()[0]) for line in open("%s-md-energy.xvg" % self.name)], name='time')) + + + # Okay, I'm not completely pandas-crazy yet. + Output = OrderedDict() + # Now take the output values from g_energy and allocate them into the Output dictionary. for i in tsspec: if i in copy_keys: Output[i] = np.array(DF[copy_keys[i]]) if 'components' in tsspec: + Components = OrderedDict() for i in ecomp: - Output[i] = np.array(DF[copy_keys[i]]) + Components[i] = np.array(DF[i]) + Output['components'] = Components # Area per lipid. # HARD CODED NUMBER: number of lipid molecules! @@ -1307,7 +1313,6 @@ def md_extract(self, tsspec, verbose=True): Output['dipole'] = np.array([[float(i) for i in line.split()[1:4]] for line in open("%s-md-dip.xvg" % self.name)]) - printcool_dictionary(Output, title = 'Output') return Output diff --git a/src/thermo.py b/src/thermo.py index 56cdf7dd5..4447d81d7 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -792,7 +792,7 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True): LinkFile(os.path.join(os.path.split(__file__)[0], "data", f), os.path.join(os.getcwd(), f)) # Put together the command. - cmdlist = ['%s python md_one.py %s' % (self.mdpfx, Sim.type)] + cmdlist = ['%s python md_one.py' % (self.mdpfx)] if temp != None: cmdlist.append('-T %g' % float(temp)) if pres != None: From 63cae2f168a790345f54b6e9b4bf8a48ac02dc72 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Wed, 23 Apr 2014 23:17:54 -0700 Subject: [PATCH 24/25] Density observable can calculate the density and gradient --- src/data/md_one.py | 4 ++ src/gmxio.py | 74 +++++++++++------------ src/nifty.py | 8 +++ src/observable.py | 101 ++++++++++++++------------------ src/thermo.py | 143 +++++++++++++++++++++++++++++++-------------- 5 files changed, 187 insertions(+), 143 deletions(-) diff --git a/src/data/md_one.py b/src/data/md_one.py index a1a4bfc7e..e71f9c3bc 100644 --- a/src/data/md_one.py +++ b/src/data/md_one.py @@ -25,6 +25,7 @@ from forcebalance.nifty import lp_dump, lp_load, wopen from forcebalance.nifty import printcool, printcool_dictionary from forcebalance.molecule import Molecule +from forcebalance.thermo import energy_derivatives from collections import OrderedDict @@ -152,6 +153,9 @@ def main(): logger.info("MD simulation took %.3f seconds\n" % click()) # Extract properties. Results = Engine.md_extract(OrderedDict([(i, {}) for i in Sim.timeseries.keys()])) + # Calculate energy and dipole derivatives if needed. + if AGrad: + Results['derivatives'] = energy_derivatives(Engine, FF, mvals, h, pgrad, dipole='dipole' in Sim.timeseries.keys()) # Dump results to file logger.info("Writing final force field.\n") pvals = FF.make(mvals) diff --git a/src/gmxio.py b/src/gmxio.py index d6a9d1732..c651df3b1 100644 --- a/src/gmxio.py +++ b/src/gmxio.py @@ -1187,34 +1187,29 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None, self.mdtraj = '%s-md.trr' % self.name # Call md_extract and return the prop_return dictionary (backward compatibility with old functionality.) - Extract = self.md_extract(OrderedDict([(i, 0) for i in ['potential', 'kinetic', 'dipole', 'components']])) - prop_return = {'Potentials': Extract['potential'], - 'Kinetics': Extract['kinetic'], - 'Dips': Extract['dipole'], - 'Ecomps': Extract['components']} - if self.pbc: - Extract_ = self.md_extract(OrderedDict([(i, 0) for i in ['density', 'volume']])) - prop_return['Rhos'] = Extract_['density'] - prop_return['Volumes'] = Extract_['volume'] - if bilayer: - Extract__ = self.md_extract(OrderedDict([(i, 0) for i in ['al', 'scd']])) - prop_return['Als'] = Extract__['al'] - prop_return['Scds'] = Extract__['scd'] - - if verbose: logger.info("Finished!\n") + old_map = {'potential' : 'Potentials', 'kinetic' : 'Kinetics', 'dipole' : 'Dips', 'components' : 'Ecomps', + 'density' : 'Rhos', 'volume' : 'Volumes', 'al' : 'Als', 'scd' : 'Scds'} + tsnames = ['potential', 'kinetic', 'dipole', 'components'] + if self.pbc: tsnames += ['density', 'volume'] + if bilayer: tsnames += ['al', 'scd'] + Extract = self.md_extract(tsnames) + prop_return = OrderedDict([(old_map[i], Extract[i]) for i in Extract.keys() if i in old_map]) return prop_return - def md_extract(self, tsspec, verbose=True): + def md_extract(self, tsnames, tsspec={}, verbose=True): """ Extract time series from the MD trajectory / energy file. Since Gromacs can do so many things in a single call to g_energy, we implement all the functionality in a single big function (it can be split off later.) + @param[in] tsnames List of tsnames, containing names of + timeseries that need to be evaluated. + @param[in] tsspec Dictionary with tsnames : tsparams key/value pairs. tsparams contains any extra information needed to calculate the observable (e.g. atom indices in S_cd) but it - may also be None. + isn't strictly required. @return answer Dictionary with tsnames : timeseries key/value pairs. The leading dimension of the time series is always the sample axis. @@ -1251,7 +1246,7 @@ def md_extract(self, tsspec, verbose=True): # Term names that we want to get from g_energy. ekeep = [] # Save anything that comes before Total-Energy if doing an energy component analysis. - if 'components' in tsspec: + if 'components' in tsnames: ecomp = [k for k,v in energyterms.items() if v <= energyterms['Total-Energy']] ekeep += ecomp[:] # These are time series which can be directly copied from g_energy output. @@ -1259,10 +1254,10 @@ def md_extract(self, tsspec, verbose=True): 'temperature' : 'Temperature', 'pressure' : 'Pressure', 'volume' : 'Volume', 'density' : 'Density', 'pv' : 'pV'} for i in copy_keys: - if i in tsspec and copy_keys[i] not in ekeep: + if i in tsnames and copy_keys[i] not in ekeep: ekeep.append(copy_keys[i]) # Area per lipid requires Box-X and Box-Y time series. - if 'al' in tsspec: + if 'al' in tsnames: ekeep += ['Box-X', 'Box-Y'] ekeep = list(set(ekeep)) eksort = [] @@ -1276,45 +1271,44 @@ def md_extract(self, tsspec, verbose=True): # Perform energy component analysis and return properties. self.callgmx("g_energy -f %s-md.edr -o %s-md-energy.xvg -xvg no" % (self.name, self.name), stdin="\n".join(eksort)) - - DF = pd.DataFrame([[float(i) for i in line.split()[1:]] for line in open("%s-md-energy.xvg" % self.name)], columns=eksort, - index = pd.Index([float(line.split()[0]) for line in open("%s-md-energy.xvg" % self.name)], name='time')) + tarray = np.array([float(line.split()[0]) for line in open("%s-md-energy.xvg" % self.name)]) + times = pd.Index(tarray, name='time') + xvgdata = [[float(i) for i in line.split()[1:]] for line in open("%s-md-energy.xvg" % self.name)] + xvgdf = pd.DataFrame(xvgdata, columns=eksort, index = times) - # Okay, I'm not completely pandas-crazy yet. + # Attempt to use Pandas more effectively. Output = OrderedDict() + Output['time'] = tarray # Now take the output values from g_energy and allocate them into the Output dictionary. - for i in tsspec: + for i in tsnames: if i in copy_keys: - Output[i] = np.array(DF[copy_keys[i]]) - if 'components' in tsspec: - Components = OrderedDict() - for i in ecomp: - Components[i] = np.array(DF[i]) - Output['components'] = Components + Output[i] = np.array(xvgdf[copy_keys[i]]) + if 'components' in tsnames: + # Energy component analysis is a DataFrame. + Output['components'] = xvgdf[ecomp] # Area per lipid. # HARD CODED NUMBER: number of lipid molecules! - if 'al' in tsspec: - Output['al'] = np.array(DF['Box-X'])*np.array(DF['Box-Y']) / 64 + if 'al' in tsnames: + Output['al'] = np.array(xvgdf['Box-X'])*np.array(xvgdf['Box-Y']) / 64 # Deuterium order parameter. # HARD CODED: atom names of lipid tails! - if 'scd' in tsspec: + if 'scd' in tsnames: n_snap = self.n_snaps(nsteps, 1000, timestep) - Scds = self.calc_scd(n_snap, timestep) - Output['scd'] = Scds + Output['scd'] = self.calc_scd(n_snap, timestep) # Dipole moments; note we use g_dipoles and not the multipole_moments function. - if 'dipole' in tsspec: + if 'dipole' in tsnames: self.callgmx("g_dipoles -s %s-md.tpr -f %s-md.trr -o %s-md-dip.xvg -xvg no" % (self.name, self.name, self.name), stdin="System\n") Output['dipole'] = np.array([[float(i) for i in line.split()[1:4]] - for line in open("%s-md-dip.xvg" % self.name)]) - + for line in open("%s-md-dip.xvg" % self.name)]) + + # We could convert it to a Panel if we wanted, but I'm not fully confident using it... return Output - def md(self, nsteps=0, nequil=0, verbose=False, deffnm=None, **kwargs): diff --git a/src/nifty.py b/src/nifty.py index d51999d1e..938b4dd2a 100644 --- a/src/nifty.py +++ b/src/nifty.py @@ -320,6 +320,14 @@ def flat(vec): """ return np.array(vec).reshape(-1) +def getval(dframe, col): + """ Extract the single non-NaN value from a column. """ + nnan = [i for i in dframe[col] if not isnpnan(i)] + if len(nnan) != 1: + logger.error('%i values in column %s are not NaN (expected only 1)' % (len(nnan), col)) + raise RuntimeError + return nnan[0] + def monotonic(arr, start, end): # Make sure an array is monotonically decreasing from the start to the end. a0 = arr[start] diff --git a/src/observable.py b/src/observable.py index 2c6d7858e..23fb6a0cc 100644 --- a/src/observable.py +++ b/src/observable.py @@ -3,8 +3,9 @@ from forcebalance.finite_difference import fdwrap, f12d3p from forcebalance.molecule import Molecule -from forcebalance.nifty import col, flat, statisticalInefficiency -from forcebalance.nifty import printcool +from forcebalance.nifty import col, flat, getval +from forcebalance.nifty import printcool, statisticalInefficiency +from forcebalance.optimizer import Counter from collections import OrderedDict @@ -14,8 +15,7 @@ # method mean_stderr def mean_stderr(ts): """Return mean and standard deviation of a time series ts.""" - return np.mean(ts), \ - np.std(ts)*np.sqrt(statisticalInefficiency(ts, warn=False)/len(ts)) + return np.mean(ts), np.std(ts)*np.sqrt(statisticalInefficiency(ts, warn=False)/len(ts)) # method energy_derivatives def energy_derivatives(engine, FF, mvals, h, pgrad, length, AGrad=True): @@ -75,11 +75,12 @@ class Observable(object): Identifier for the observable that is specified in `observables` in Target options. """ - def __init__(self, source, name=None): + def __init__(self, source): # Reference data which can be useful in calculating the observable. + if 'temp' in source: self.temp = getval(source, 'temp') + if 'pres' in source: self.pres = getval(source, 'pres') self.Data = source[self.columns] - self.name = name if name is not None else "empty" - + def __str__(self): return "Observable = " + self.name.capitalize() + "; Columns = " + ', '.join(self.columns) @@ -114,6 +115,26 @@ def extract(self, engines, FF, mvals, h, AGrad=True): logger.error("Extract method not implemented in base class.\n") raise NotImplementedError + def aggregate(self, Sims, AGrad, cycle=None): + print self.name + if cycle == None: cycle = Counter() + # Different from the Results objects in the Simulation, this + # one is keyed by the simulation type then by the time series + # data type. + self.TimeSeries = OrderedDict([(i, OrderedDict()) for i, j in self.requires.items()]) + for stype in self.requires: + for dtype in self.requires[stype]: + self.TimeSeries[stype][dtype] = np.concatenate([Sim.Results[cycle][dtype] for Sim in Sims if Sim.type == stype]) + if AGrad: + # Also aggregate the derivative information along the second axis (snapshot axis) + self.Derivatives = OrderedDict() + for stype in self.requires: + # The derivatives that we have may be obtained from the 'derivatives' data structure of the first Simulation + # that matches the required simulation type. + self.Derivatives[stype] = OrderedDict() + for dtype in [Sim.Results[cycle]['derivatives'].keys() for Sim in Sims if Sim.type == stype][0]: + self.Derivatives[stype][dtype] = np.concatenate([Sim.Results[cycle]['derivatives'][dtype] for Sim in Sims if Sim.type == stype], axis=1) + # class Observable_Density class Observable_Density(Observable): @@ -136,66 +157,30 @@ def __init__(self, source): self.columns = ['density'] super(Observable_Density, self).__init__(source) - def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): + def evaluate(self, AGrad): #==========================================# # Physical constants and local variables. # #==========================================# # Energies in kJ/mol and lengths in nanometers. kB = 0.008314472471220214 - kT = kB*self.temperature + kT = kB*self.temp Beta = 1.0/kT mBeta = -Beta - - #======================================================# - # Get simulation properties depending on the engines. # - #======================================================# - if self.engname == "gromacs": - # Default name - deffnm = os.path.basename(os.path.splitext(engines[0].mdene)[0]) - # What energy terms are there and what is their order - energyterms = engines[0].energy_termnames(edrfile="%s.%s" % (deffnm, "edr")) - # Grab energy terms to print and keep track of energy term order. - ekeep = ['Total-Energy', 'Potential', 'Kinetic-En.', 'Temperature'] - ekeep += ['Volume', 'Density'] - - ekeep_order = [key for (key, value) in - sorted(energyterms.items(), key=lambda (k, v) : v) - if key in ekeep] - - # Perform energy component analysis and return properties. - engines[0].callgmx(("g_energy " + - "-f %s.%s " % (deffnm, "edr") + - "-o %s-energy.xvg " % deffnm + - "-xvg no"), - stdin="\n".join(ekeep)) - - # Read data and store properties by grabbing columns in right order. - data = np.loadtxt("%s-energy.xvg" % deffnm) - Energy = data[:, ekeep_order.index("Total-Energy") + 1] - Potential = data[:, ekeep_order.index("Potential") + 1] - Kinetic = data[:, ekeep_order.index("Kinetic-En.") + 1] - Volume = data[:, ekeep_order.index("Volume") + 1] - Temperature = data[:, ekeep_order.index("Temperature") + 1] - Density = data[:, ekeep_order.index("Density") + 1] - - #============================================# - # Compute the potential energy derivatives. # - #============================================# - logger.info(("Calculating potential energy derivatives " + - "with finite difference step size: %f\n" % h)) - printcool("Initializing array to length %i" % len(Energy), - color=4, bold=True) - G = energy_derivatives(engines[0], FF, mvals, h, pgrad, len(Energy), AGrad) - - #=========================================# - # Observable properties and derivatives. # - #=========================================# + phase = self.requires.keys()[0] + # Density time series. + Density = self.TimeSeries[phase]['density'] # Average and error. Rho_avg, Rho_err = mean_stderr(Density) - # Analytic first derivative. - Rho_grad = mBeta * (flat(np.mat(G) * col(Density)) / len(Density) \ - - np.mean(Density) * np.mean(G, axis=1)) - return Rho_avg, Rho_err, Rho_grad + Answer = OrderedDict() + Answer['mean'] = Rho_avg + Answer['stderr'] = Rho_err + if AGrad: + G = self.Derivatives[phase]['potential'] + # Analytic first derivative. + Rho_grad = mBeta * (flat(np.matrix(G) * col(Density)) / len(Density) + - np.mean(Density) * np.mean(G, axis=1)) + Answer['grad'] = Rho_grad + return Answer class Liquid_Density(Observable_Density): def __init__(self, source): diff --git a/src/thermo.py b/src/thermo.py index 4447d81d7..76a20d1cf 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -12,12 +12,13 @@ from forcebalance.molecule import Molecule from forcebalance.observable import OMap from forcebalance.target import Target -from forcebalance.finite_difference import in_fd +from forcebalance.finite_difference import in_fd, fdwrap, f12d3p from forcebalance.nifty import flat, col, row, isint, isnpnan -from forcebalance.nifty import lp_dump, lp_load, wopen, _exec +from forcebalance.nifty import lp_dump, lp_load, wopen, _exec, getval from forcebalance.nifty import GoInto, LinkFile, link_dir_contents from forcebalance.nifty import printcool, printcool_dictionary from forcebalance.nifty import getWorkQueue +from forcebalance.optimizer import Counter from collections import defaultdict, OrderedDict @@ -27,14 +28,6 @@ # print logger.parent.parent.handlers[0] # logger.parent.parent.handlers = [] -def getval(dframe, col): - """ Extract the single non-NaN value from a column. """ - nnan = [i for i in dframe[col] if not isnpnan(i)] - if len(nnan) != 1: - logger.error('%i values in column %s are not NaN (expected only 1)' % (len(nnan), col)) - raise RuntimeError - return nnan[0] - class TextParser(object): """ Parse a text file. """ def __init__(self, fnm): @@ -436,6 +429,53 @@ def find_file(tgtdir, index, stype, sufs, iscrd, icn=0): raise RuntimeError return found, 0 if numbered else icn +def energy_derivatives(engine, FF, mvals, h, pgrad, dipole=False): + + """ + Compute the first and second derivatives of a set of snapshot + energies with respect to the force field parameters. + + This basically calls the finite difference subroutine on the + energy_driver subroutine also in this script. + + In the future we may need to be more sophisticated with + controlling the quantities which are differentiated, but for + now this is okay.. + + @param[in] engine Engine object for calculating energies + @param[in] FF Force field object + @param[in] mvals Mathematical parameter values + @param[in] h Finite difference step size + @param[in] pgrad List of active parameters for differentiation + @param[in] dipole Switch for dipole derivatives. + @return G First derivative of the energies in a N_param x N_coord array + @return GDx First derivative of the box dipole moment x-component in a N_param x N_coord array + @return GDy First derivative of the box dipole moment y-component in a N_param x N_coord array + @return GDz First derivative of the box dipole moment z-component in a N_param x N_coord array + + """ + def single_point(mvals_): + FF.make(mvals_) + if dipole: + return engine.energy_dipole() + else: + return engine.energy() + + ED0 = single_point(mvals) + G = OrderedDict() + G['potential'] = np.zeros((FF.np, ED0.shape[0])) + if dipole: + G['dipole'] = np.zeros((FF.np, ED0.shape[0], 3)) + for i in pgrad: + logger.info("%i %s\r" % (i, (FF.plist[i] + " "*30))) + edg, _ = f12d3p(fdwrap(single_point,mvals,i),h,f0=ED0) + if dipole: + G['potential'][i] = edg[:,0] + G['dipole'][i] = edg[:,1:] + else: + G['potential'][i] = edg[:] + return G + class Thermo(Target): """ A target for fitting general experimental data sets. The source @@ -634,6 +674,8 @@ def floatcol(col): self.Data[col] = self.Data[col].astype(float) intcol('n_ic') + floatcol('temp') + floatcol('pres') # A list of indices (i.e. top-level indices) which correspond # to sets of simulations that we'll be running. @@ -685,7 +727,7 @@ def initialize_observables(self): Objs = [] Reqs = [] for OClass in OMap[oname]: - OObj = OClass(self.Data) + OObj = OClass(self.Data.ix[index]) Reqs.append(OObj.requires.keys()) if all([i in self.SimNames for i in OObj.requires.keys()]): Objs.append(OObj) @@ -776,6 +818,7 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True): for Sim in self.Simulations[index]: Sim.gradient = AGrad simd = os.path.join(os.getcwd(), index, Sim.name) + Sim.RunDirs[Counter()] = simd GoInto(simd) # Submit or run the simulation if the result file does not exist. if not (os.path.exists('result.p') or os.path.exists('result.p.bz2')): @@ -807,7 +850,7 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True): # if wq == None: # logger.info("Running condensed phase simulation locally.\n") # logger.info("You may tail -f %s/npt.out in another terminal window\n" % os.getcwd()) - _exec(cmdstr, copy_stderr=False, outfnm='md_one.out') + _exec(cmdstr, copy_stderr=True, outfnm='md_one.out') # else: # queue_up(wq, command = cmdstr+' &> npt.out', # input_files = self.nptfiles + self.scripts + ['forcebalance.p'], @@ -815,39 +858,6 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True): os.chdir(cwd) return - def retrieve(self, dp): - """Retrieve the molecular dynamics (MD) results and store the calculated - observables in the Point object dp. - - Parameters - ---------- - dp : Point - Store the calculated observables in this point. - - Returns - ------- - Nothing - - """ - abspath = os.path.join(os.getcwd(), '%d/md_result.p' % dp.idnr) - - if os.path.exists(abspath): - logger.info('Reading data from ' + abspath + '.\n') - - vals, errs, grads = lp_load(open(abspath)) - - dp.data["values"] = vals - dp.data["errors"] = errs - dp.data["grads"] = grads - - else: - msg = 'The file ' + abspath + ' does not exist so we cannot read it.\n' - logger.warning(msg) - - dp.data["values"] = np.zeros((len(self.observables))) - dp.data["errors"] = np.zeros((len(self.observables))) - dp.data["grads"] = np.zeros((len(self.observables), self.FF.np)) - def indicate(self): """Shows optimization state.""" return @@ -995,6 +1005,18 @@ def get(self, mvals, AGrad=True, AHess=True): Objective = 0.0 Gradient = np.zeros(self.FF.np) Hessian = np.zeros((self.FF.np, self.FF.np)) + + # Retrieve simulation results. + for index in self.Indices: + for Sim in self.Simulations[index]: + Sim.retrieve() + + # Calculate observable values. + for oname in self.Observables.keys(): + for index in self.Indices: + self.Observables[oname][index].aggregate(self.Simulations[index], AGrad) + if oname == 'density': self.Observables[oname][index].evaluate(AGrad) + return { "X": Objective, "G": Gradient, "H": Hessian} for pt in self.points: @@ -1120,10 +1142,13 @@ def __init__(self, tgt, data, name, index, stype, icn, tsnames): self.Data = copy.deepcopy(data) # Type of the simulation (map to simulation settings) self.type = stype + # Root directory of the ForceBalance job + self.root = tgt.root # Locate the initial coordinate file and frame number. self.initial, self.iframe = find_file(os.path.join(tgt.root, tgt.tgtdir), index, stype, tgt.crdsfx, True, icn) # The time series for the simulation. self.timeseries = OrderedDict([(i, []) for i in tsnames]) + self.timeseries['potential'] = [] # ALWAYS require the potential energy. # The file extension that the coordinate file will be written with. self.fext = os.path.splitext(self.initial)[1] # Auxiliary files to be copied to the current location prior to running the simulation. @@ -1139,6 +1164,10 @@ def __init__(self, tgt, data, name, index, stype, icn, tsnames): self.h = tgt.h # Active parameters to differentiate over. self.pgrad = tgt.pgrad + # List of ITERATION : directory pairs. + self.RunDirs = OrderedDict() + # List of ITERATION : result data structures. + self.Results = OrderedDict() pbc = Simulation.type_settings[self.type]['pbc'] @@ -1214,3 +1243,27 @@ def __str__(self): msg.append("Initial Conditions: File %s Frame %i" % (self.initial, self.iframe)) msg.append("Timeseries Names: %s" % (', '.join(self.timeseries.keys()))) return "\n".join(msg) + + def retrieve(self, cycle=None): + """Retrieve the molecular dynamics (MD) results and store the calculated + observables in the Simulation object. + + Parameters + ---------- + dp : Point + Store the calculated observables in this point. + + Returns + ------- + Nothing + + """ + if cycle == None: cycle = Counter() + + abspath = os.path.join(self.RunDirs[cycle], 'md_result.p') + if os.path.exists(abspath): + logger.info('Simulation %s reading data from ' % self.name + abspath.replace(self.root+'/', '') + ' .\n') + self.Results[cycle] = lp_load(open(abspath)) + else: + logger.warning('The file ' + abspath + ' does not exist so we cannot read it.\n') + self.Results[cycle] = None From 2af905ff5190d8d4ec74a6d60b7f0ebaa9a3f33b Mon Sep 17 00:00:00 2001 From: leeping Date: Tue, 29 Apr 2014 07:07:46 -0700 Subject: [PATCH 25/25] Fix occasional failure in topology building --- src/molecule.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/molecule.py b/src/molecule.py index f423ec254..0d69eaa81 100644 --- a/src/molecule.py +++ b/src/molecule.py @@ -1419,20 +1419,20 @@ def build_topology(self, sn=None, Fac=1.2): zidx = -1 for j in xgrd: xi = self.xyzs[sn][i][0] - if toppbc and xi < 0: xi += xmax - if toppbc and xi > xmax: xi -= xmax + while toppbc and xi < 0: xi += xmax + while toppbc and xi > xmax: xi -= xmax if xi < j: break xidx += 1 for j in ygrd: yi = self.xyzs[sn][i][1] - if toppbc and yi < 0: yi += ymax - if toppbc and yi > ymax: yi -= ymax + while toppbc and yi < 0: yi += ymax + while toppbc and yi > ymax: yi -= ymax if yi < j: break yidx += 1 for j in zgrd: zi = self.xyzs[sn][i][2] - if toppbc and zi < 0: zi += zmax - if toppbc and zi > zmax: zi -= zmax + while toppbc and zi < 0: zi += zmax + while toppbc and zi > zmax: zi -= zmax if zi < j: break zidx += 1 gasn[(xidx,yidx,zidx)].append(i)