From 2d0e90dd07997dbbe244387cf83ca8a26f95692a Mon Sep 17 00:00:00 2001
From: Lee-Ping <leeping@stanford.edu>
Date: Mon, 31 Mar 2014 21:38:42 -0700
Subject: [PATCH 01/25] Begin new data table parsing

---
 src/thermo.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/thermo.py b/src/thermo.py
index 017586c78..e1f743a55 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -1,6 +1,7 @@
 import os
 import errno
 import numpy as np
+import pandas as pd
 
 from forcebalance.target import Target
 from forcebalance.finite_difference import in_fd

From 9d939ac9c34f4c80f28862a9cbf3904e2efecfff Mon Sep 17 00:00:00 2001
From: Lee-Ping <leeping@stanford.edu>
Date: Fri, 4 Apr 2014 01:30:40 -0700
Subject: [PATCH 02/25] Implemented tri-format parser (broke Thermo)

---
 src/parser.py                                 |   3 +-
 src/thermo.py                                 | 361 ++++++++++++++++--
 .../targets/Lipid_MUL/lipidcol2a.txt          |   5 +
 .../targets/Lipid_MUL/scd323.txt              |  17 +
 .../targets/Lipid_MUL/scd333.txt              |  17 +
 .../targets/Lipid_MUL/scd338.txt              |  17 +
 .../targets/Lipid_MUL/scd353.txt              |  17 +
 .../targets/Lipid_RIT/lipidcol1.txt           |  68 ++++
 .../targets/Lipid_SPC/lipidcol1.txt           |  67 ++++
 .../targets/Lipid_TAB/lipidcol1.txt           |  65 ++++
 .../targets/LiquidBromine/expset.txt          |   4 +-
 .../targets/LiquidBromine_CSV/data.csv        |   8 +
 .../targets/LiquidBromine_TAB/data.tab.txt    |   8 +
 .../004_thermo_liquid_bromine/test_parse.in   | 145 +++++++
 14 files changed, 773 insertions(+), 29 deletions(-)
 create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt
 create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd323.txt
 create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd333.txt
 create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd338.txt
 create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd353.txt
 create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt
 create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt
 create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt
 create mode 100644 studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv
 create mode 100644 studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt
 create mode 100644 studies/004_thermo_liquid_bromine/test_parse.in

diff --git a/src/parser.py b/src/parser.py
index 95343c86b..529753cfc 100644
--- a/src/parser.py
+++ b/src/parser.py
@@ -150,7 +150,7 @@
                  "gmx_top"               : (None, -10, 'Gromacs .top files.  If not provided, will search for default.', 'Targets that use GROMACS', 'GMX'),
                  "gmx_ndx"               : (None, -10, 'Gromacs .ndx files.  If not provided, will search for default.', 'Targets that use GROMACS', 'GMX'),
                  "tinker_key"            : (None, -10, 'TINKER .key files.  If not provided, will search for default.', 'Targets that use TINKER', 'TINKER'),
-                 "expdata_txt"           : ('expset.txt', 0, 'Text file containing experimental data.', 'Thermodynamic properties target', 'thermo'),
+                 "source"                : ('data.txt', 0, 'Text file containing source data (experimental data, parameters for observable models, weights).', 'Thermodynamic properties target', 'thermo'),
                  "read"                  : (None, 50, 'Provide a temporary directory ".tmp" to read data from a previous calculation on the initial iteration (for instance, to restart an aborted run).', 'Liquid and Remote targets', 'Liquid, Remote'),
                  },
     'allcaps' : {"type"   : (None, 200, 'The type of fitting target, for instance AbInitio_GMX ; this must correspond to the name of a Target subclass.', 'All targets (important)' ,''),
@@ -281,6 +281,7 @@
         "gas_equ_steps" : "gas_eq_steps",
         "lipid_prod_steps" : "lipid_md_steps",
         "lipid_equ_steps" : "lipid_eq_steps",
+        "expdata_txt" : "source", 
         }
 
 ## Listing of sections in the input file.
diff --git a/src/thermo.py b/src/thermo.py
index 14cd23629..351f9957f 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -1,7 +1,11 @@
 import os
+import re
+import csv
 import errno
 import numpy as np
 import pandas as pd
+import itertools
+import cStringIO
 
 from forcebalance.target import Target
 from forcebalance.finite_difference import in_fd
@@ -10,16 +14,284 @@
 from forcebalance.nifty import LinkFile, link_dir_contents
 from forcebalance.nifty import printcool, printcool_dictionary
 
-from collections import OrderedDict
+from collections import defaultdict, OrderedDict
 
 from forcebalance.output import getLogger
 logger = getLogger(__name__)
-#
+
+class TextParser(object):
+    """ Parse a text file. """
+    def __init__(self, fnm):
+        self.fnm = fnm
+        self.parse()
+
+    def is_empty_line(self):
+        return all([len(fld.strip()) == 0 for fld in self.fields])
+
+    def is_comment_line(self):
+        return re.match('^[\'"]?#',self.fields[0].strip())
+
+    def process_header(self):
+        """ Function for setting more attributes using the header line, if needed. """
+        self.headings = [i.strip() for i in self.fields[:]]
+
+    def process_data(self):
+        """ Function for setting more attributes using the current line, if needed. """
+        trow = []
+        for ifld in range(len(self.headings)):
+            if ifld < len(self.fields):
+                trow.append(self.fields[ifld])
+            else:
+                trow.append('')
+        return trow
+
+    def sanity_check(self):
+        """ Extra sanity checks. """
+
+    def parse(self):
+        self.headings = []                 # Fields in header line
+        meta = defaultdict(list)          # Dictionary of metadata
+        found_header = 0                  # Whether we found the header line
+        table = []                        # List of data records
+        self.generate_splits()            # Generate a list of records for each line.
+        self.ln = 0                       # Current line number
+        for line, fields in zip(open(self.fnm).readlines(), self.splits):
+            # Set attribute so methods can use it.
+            self.fields = fields
+            # Skip over empty lines or comment lines.
+            if self.is_empty_line():
+                logger.info("\x1b[96mempt\x1b[0m %s\n" % line.replace('\n',''))
+                self.ln += 1
+                continue
+            if self.is_comment_line():
+                logger.info("\x1b[96mcomm\x1b[0m %s\n" % line.replace('\n',''))
+                self.ln += 1
+                continue
+            # Indicates metadata mode.
+            is_meta = 0
+            # Indicates whether this is the header line.
+            is_header = 0
+            # Split line by tabs.
+            for ifld, fld in enumerate(fields):
+                fld = fld.strip()
+                # Stop parsing when we encounter a comment line.
+                if re.match('^[\'"]?#',fld): break
+                # The first word would contain the name of the metadata key.
+                if ifld == 0:
+                    mkey = fld
+                # Check if the first field is an equals sign (turn on metadata mode).
+                if ifld == 1:
+                    # Activate metadata mode.
+                    if fld == "=":
+                        is_meta = 1
+                    # Otherwise, this is the header line.
+                    elif not found_header:
+                        is_header = 1
+                        found_header = 1
+                # Read in metadata.
+                if ifld > 1 and is_meta:
+                    meta[mkey].append(fld)
+            # Set field start, field end, and field content for the header.
+            if is_header:
+                logger.info("\x1b[1;96mhead\x1b[0m %s\n" % line.replace('\n',''))
+                self.process_header()
+            elif is_meta:
+                logger.info("\x1b[96mmeta\x1b[0m %s\n" % line.replace('\n',''))
+            else:
+                # Build the row of data to be appended to the table.
+                # Loop through the fields in the header and inserts fields
+                # in the data line accordingly.  Ignores trailing tabs/spaces.
+                logger.info("\x1b[96mdata\x1b[0m %s\n" % line.replace('\n',''))
+                table.append(self.process_data())
+            self.ln += 1
+        self.sanity_check()
+        printcool("%s parsed as %s" % (self.fnm.replace(os.getcwd()+'/',''), self.format), color=6)
+        self.metadata = meta
+        self.table = table
+        
+class CSV_Parser(TextParser):
+    
+    """ 
+    Parse a comma-separated file.  This class is for all
+    source files that are .csv format (characterized by having the
+    same number of comma-separated fields in each line).  Fields are
+    separated by commas but they may contain commas as well.
+
+    In contrast to the other formats, .csv MUST contain the same
+    number of commas in each line.  .csv format is easily prepared
+    using Excel.
+    """
+    
+    def __init__(self, fnm):
+        self.format = "comma-separated values (csv)"
+        super(CSV_Parser, self).__init__(fnm)
+
+    def generate_splits(self):
+        with open(self.fnm, 'r') as f: self.splits = list(csv.reader(f))
+
+class TAB_Parser(TextParser):
+    
+    """ 
+    Parse a tab-delimited file.  This function is called for all
+    source files that aren't csv and contain at least one tab.  
+    Fields are separated by tabs and do not contain tabs.
+
+    Tab-delimited format is easy to prepare using programs like Excel.
+    It is easier to read than .csv but represented differently by
+    different editors.  
+    
+    Empty fields must still exist (represented using multiple tabs).
+    """
+    
+    def __init__(self, fnm):
+        self.format = "tab-delimited text"
+        super(TAB_Parser, self).__init__(fnm)
+
+    def generate_splits(self):
+        self.splits = [line.split('\t') for line in open(self.fnm).readlines()]
+
+class FIX_Parser(TextParser):
+    
+    """ 
+    Parse a fixed width format file.  This function is called for all
+    source files that aren't csv and contain no tabs.
+
+    Fixed width is harder to prepare by hand but easiest to read,
+    because it looks the same in all text editors.  The field width is
+    determined by the header line (first line in the data table),
+    i.e. the first non-empty, non-comment, non-metadata line.
+
+    Empty fields need to be filled with the correct number of spaces.
+    All fields must have the same alignment (left or right).  The
+    start and end of each field is determined from the header line and
+    used to determine alignment. If the alignment cannot be determined
+    then it will throw an error.
+
+    Example of a left-aligned fixed width file:
+
+    T           P (atm)     Al          Al_wt       Scd1_idx    Scd1        Scd2_idx    Scd2    
+    323.15      1           0.631       1           C15                     C34                 
+                                                    C17         0.198144    C36         0.198144
+                                                    C18         0.198128    C37         0.198128
+                                                    C19         0.198111    C38         0.198111
+                                                    C20         0.198095    C39         0.198095
+                                                    C21         0.198079    C40         0.198079
+                                                    C22         0.197799    C41         0.197537
+                                                    C23         0.198045    C42         0.198046
+                                                    C24         0.178844    C43         0.178844
+                                                    C25         0.167527    C44         0.178565
+                                                    C26         0.148851    C45         0.16751
+                                                    C27         0.134117    C46         0.148834
+                                                    C28         0.119646    C47         0.1341
+                                                    C29         0.100969    C48         0.110956
+                                                    C30         0.07546     C49         0.087549
+                                                    C31                     C50
+
+    """
+
+    def __init__(self, fnm):
+        self.format = "fixed-width text"
+        self.fbegs_dat = []
+        self.fends_dat = []
+        super(FIX_Parser, self).__init__(fnm)
+
+    def generate_splits(self):
+        # This regular expression splits a string looking like this:
+        # "Density (kg m^-3) Hvap (kJ mol^-1) Alpha Kappa".  But I
+        # don't want to split in these places: "Density_(kg_m^-3)
+        # Hvap_(kJ_mol^-1) Alpha Kappa"
+        allfields = [list(re.finditer('[^\s(]+(?:\s*\([^)]*\))?', line)) for line in open(self.fnm).readlines()]
+        self.splits = []
+        # Field start / end positions for each line in the file
+        self.fbegs = []
+        self.fends = []
+        for line, fields in zip(open(self.fnm).readlines(), allfields):
+            self.splits.append([fld.group(0) for fld in fields])
+            self.fbegs.append([fld.start() for fld in fields])
+            self.fends.append([fld.end() for fld in fields])
+        
+    def process_header(self):
+        super(FIX_Parser, self).process_header()
+        # Field start / end positions for the header line
+        self.hbeg = self.fbegs[self.ln]
+        self.hend = self.fends[self.ln]
+
+    def process_data(self):
+        trow = []
+        hbeg = self.hbeg
+        hend = self.hend
+        fbeg = self.fbegs[self.ln]
+        fend = self.fends[self.ln]
+        fields = self.fields
+        # Check alignment and throw an error if incorrectly formatted.
+        if not ((set(fbeg).issubset(hbeg)) or (set(fend).issubset(hend))):
+            logger.error("This \x1b[91mdata line\x1b[0m is not aligned with the \x1b[92mheader line\x1b[0m!\n")
+            logger.error("\x1b[92m%s\x1b[0m\n" % header.replace('\n',''))
+            logger.error("\x1b[91m%s\x1b[0m\n" % line.replace('\n',''))
+            raise RuntimeError
+        # Left-aligned case
+        if set(fbeg).issubset(hbeg):
+            for hpos in hbeg:
+                if hpos in fbeg:
+                    trow.append(fields[fbeg.index(hpos)])
+                else:
+                    trow.append('')
+        # Right-aligned case
+        if set(fend).issubset(hend):
+            for hpos in hend:
+                if hpos in fend:
+                    trow.append(fields[fend.index(hpos)])
+                else:
+                    trow.append('')
+        # Field start / end positions for the line of data
+        self.fbegs_dat.append(fbeg[:])
+        self.fends_dat.append(fend[:])
+        return trow
+
+    def sanity_check(self):
+        if set(self.hbeg).issuperset(set(itertools.chain(*self.fbegs_dat))):
+            self.format = "left-aligned fixed width text"
+        elif set(self.hend).issuperset(set(itertools.chain(*self.fends_dat))):
+            self.format = "right-aligned fixed width text"
+        else:
+            # Sanity check - it should never get here unless the parser is incorrect.
+            raise RuntimeError("Fixed-width format detected but columns are neither left-aligned nor right-aligned!")
+    
+def parse1(fnm):
+
+    """Determine the format of the source file and call the
+    appropriate parsing function."""
+
+    # CSV files have the same number of comma separated fields in every line, they are the simplest to parse.
+    with open(fnm, 'r') as f: csvf = list(csv.reader(f))
+    if len(csvf[0]) > 1 and len(set([len(i) for i in csvf])) == 1:
+        return CSV_Parser(fnm)
+
+    # Strip away comments and empty lines.
+    nclines = [re.sub('[ \t]*#.*$','',line) for line in open(fnm).readlines() 
+               if not (line.strip().startswith("#") or not line.strip())]
+
+    # Print the sanitized lines to a new file object.
+    # Note the file object needs ot be rewound every time we read or write to it.
+    fdat = cStringIO.StringIO()
+    for line in nclines:
+        print >> fdat, line,
+    fdat.seek(0)
+    
+    # Now the file can either be tab-delimited or fixed width.
+    # If ANY tabs are found in the sanitized lines, then it is taken to be
+    # a tab-delimited file.
+    have_tabs = any(['\t' in line for line in fdat.readlines()]) ; fdat.seek(0)
+    if have_tabs:
+        return TAB_Parser(fnm)
+    else:
+        return FIX_Parser(fnm)
+    return
+
 class Thermo(Target):
     """
     A target for fitting general experimental data sets. The
-    experimental data is described in a .txt file and is handled with a
-    `Quantity` subclass.
+    source data is described in a .txt file.
 
     """
     def __init__(self, options, tgt_opts, forcefield):
@@ -27,8 +299,8 @@ def __init__(self, options, tgt_opts, forcefield):
         super(Thermo, self).__init__(options, tgt_opts, forcefield)
 
         ## Parameters
-        # Reference experimental data
-        self.set_option(tgt_opts, "expdata_txt", forceprint=True)
+        # Source data (experimental data, model parameters and weights)
+        self.set_option(tgt_opts, "source", forceprint=True)
         # Quantities to calculate
         self.set_option(tgt_opts, "quantities", forceprint=True)
         # Length of simulation chain
@@ -48,31 +320,68 @@ def __init__(self, options, tgt_opts, forcefield):
         # Weights for quantities
         self.weights   = {}
 
-        ## Read experimental data and initialize points
-        self._read_expdata(os.path.join(self.root,
-                                        self.tgtdir,
-                                        self.expdata_txt))
+        ## Read source data and initialize points
+        self.read_source(os.path.join(self.root, self.tgtdir, self.source))
         
         ## Copy run scripts from ForceBalance installation directory
         for f in self.scripts:
             LinkFile(os.path.join(os.path.split(__file__)[0], "data", f),
                      os.path.join(self.root, self.tempdir, f))
     
-    def _read_expdata(self, expdata):
-        """Read and store experimental data.
+    def read_source(self, source):
+        """Read and store source data.
 
         Parameters
         ----------
-        expdata : string
-            Read experimental data from this filename.
+        source : string
+            Read source data from this filename.
 
         Returns
         -------
         Nothing
 
         """
-        fp = open(expdata)
+            
+        parser = parse1(source)
+        print parser.headings
+        printcool_dictionary(parser.metadata, title="Metadata")
+        # print parser.table
+        revised_headings = []
+        obs = ''
+        def error_left(i):
+            logger.error('Encountered heading %s but there is no observable to the left\n' % i)
+            raise RuntimeError
+
+        for head in parser.headings:
+            usplit = re.split(' *\(', head, maxsplit=1)
+            if len(usplit) > 1:
+                hfirst = usplit[0]
+                punit = re.sub('\)$','',usplit[1].strip())
+                print "header", head, "split into", hfirst, ",", punit
+            else:
+                hfirst = head
+                punit = ''
+            newh = hfirst
+            if head.lower() in ['w', 'wt', 'wts']:
+                if obs == '': error_left(head)
+                newh = obs + '_' + hfirst
+            elif head.lower() in ['s', 'sig', 'sigma']:
+                if obs == '': error_left(head)
+                newh = obs + '_' + hfirst
+            elif head.lower() in ['idx']:
+                if obs == '': error_left(head)
+                newh = obs + '_' + hfirst
+            else:
+                obs = hfirst
+            if newh != hfirst:
+                print "header", head, "renamed to", newh
+
+        raw_input()
+
+        return
 
+        fp = open(expdata)
+        
         line         = fp.readline()
         foundHeader  = False
         names        = None
@@ -80,32 +389,32 @@ def _read_expdata(self, expdata):
         label_header = None
         label_unit   = None
         count        = 0
+        metadata     = {}
         while line:
             # Skip comments and blank lines
             if line.lstrip().startswith("#") or not line.strip():
                 line = fp.readline()
                 continue
-
+            # Metadata is denoted using 
             if "=" in line: # Read variable
                 param, value = line.split("=")
                 param = param.strip().lower()
-                if param == "denoms":
-                    for e, v in enumerate(value.split()):
-                        self.denoms[self.quantities[e]] = float(v)
-                elif param == "weights":
-                    for e, v in enumerate(value.split()):
-                        self.weights[self.quantities[e]] = float(v)
+                metadata[param] = value
+                # if param == "denoms":
+                #     for e, v in enumerate(value.split()):
+                #         self.denoms[self.quantities[e]] = float(v)
+                # elif param == "weights":
+                #     for e, v in enumerate(value.split()):
+                #         self.weights[self.quantities[e]] = float(v)
             elif foundHeader: # Read exp data
                 count      += 1
                 vals        = line.split()
-                
                 label       = (vals[0], label_header, label_unit)
                 refs        = np.array(vals[1:-2:2]).astype(float)
                 wts         = np.array(vals[2:-2:2]).astype(float)
                 temperature = float(vals[-2])
                 pressure    = None if vals[-1].lower() == "none" else \
                   float(vals[-1])
-                
                 dp = Point(count, label=label, refs=refs, weights=wts,
                            names=names, units=units,
                            temperature=temperature, pressure=pressure)
@@ -114,12 +423,10 @@ def _read_expdata(self, expdata):
                 foundHeader = True
                 headers = zip(*[tuple(h.split("_")) for h in line.split()
                                 if h != "w"])
-
                 label_header = list(headers[0])[0]
                 label_unit   = list(headers[1])[0]
                 names        = list(headers[0][1:-2])
                 units        = list(headers[1][1:-2])
-                                
             line = fp.readline()            
     
     def retrieve(self, dp):
@@ -214,6 +521,7 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True):
 
     def indicate(self):
         """Shows optimization state."""
+        return
         AGrad     = hasattr(self, 'Gp')
         PrintDict = OrderedDict()
         
@@ -358,6 +666,7 @@ def get(self, mvals, AGrad=True, AHess=True):
         Objective = 0.0
         Gradient  = np.zeros(self.FF.np)
         Hessian   = np.zeros((self.FF.np, self.FF.np))
+        return { "X": Objective, "G": Gradient, "H": Hessian} 
 
         for pt in self.points:
             # Update data point with MD results
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt
new file mode 100644
index 000000000..8ba35c2d9
--- /dev/null
+++ b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt
@@ -0,0 +1,5 @@
+T           P (atm)   Al            Al_wt         Scd1_idx      Scd1          Scd2_idx      Scd2          Scd1_wt       Kappa         Kappa_wt      n_ic
+323.15      1         0.631         1             scd323.txt:1  scd323.txt:2  scd323.txt:3  scd323.txt:4  1             58            1             10
+333.15      1         0.65          1             scd333.txt:1  scd333.txt:2  scd333.txt:3  scd333.txt:4  0             58            0             10
+338.15      1         0.671         1             scd338.txt:1  scd338.txt:2  scd338.txt:3  scd338.txt:4  1             58            0             10
+353.15      1         0.719         1             scd353.txt:1  scd353.txt:2  scd353.txt:3  scd353.txt:4  1             58            0             10
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd323.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd323.txt
new file mode 100644
index 000000000..57c1cfa5b
--- /dev/null
+++ b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd323.txt
@@ -0,0 +1,17 @@
+Scd1_idx	Scd1	Scd2_idx	Scd2
+C15		C34	
+C17	0.198144	C36	0.198144
+C18	0.198128	C37	0.198128
+C19	0.198111	C38	0.198111
+C20	0.198095	C39	0.198095
+C21	0.198079	C40	0.198079
+C22	0.197799	C41	0.197537
+C23	0.198045	C42	0.198046
+C24	0.178844	C43	0.178844
+C25	0.167527	C44	0.178565
+C26	0.148851	C45	0.16751
+C27	0.134117	C46	0.148834
+C28	0.119646	C47	0.1341
+C29	0.100969	C48	0.110956
+C30	0.07546	C49	0.087549
+C31		C50	
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd333.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd333.txt
new file mode 100644
index 000000000..26ee01c85
--- /dev/null
+++ b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd333.txt
@@ -0,0 +1,17 @@
+Scd1_idx	Scd1	Scd2_idx	Scd2
+C15		C34	
+C17	0.181121	C36	0.181121
+C18	0.180807	C37	0.180807
+C19	0.181055	C38	0.181055
+C20	0.180741	C39	0.180741
+C21	0.180989	C40	0.180989
+C22	0.168579	C41	0.168579
+C23	0.169109	C42	0.169109
+C24	0.149104	C43	0.149104
+C25	0.138945	C44	0.138945
+C26	0.123439	C45	0.138629
+C27	0.112717	C46	0.123968
+C28	0.098056	C47	0.112121
+C29	0.083396	C48	0.089303
+C30	0.062266	C49	0.070424
+C31		C50	
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd338.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd338.txt
new file mode 100644
index 000000000..26ee01c85
--- /dev/null
+++ b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd338.txt
@@ -0,0 +1,17 @@
+Scd1_idx	Scd1	Scd2_idx	Scd2
+C15		C34	
+C17	0.181121	C36	0.181121
+C18	0.180807	C37	0.180807
+C19	0.181055	C38	0.181055
+C20	0.180741	C39	0.180741
+C21	0.180989	C40	0.180989
+C22	0.168579	C41	0.168579
+C23	0.169109	C42	0.169109
+C24	0.149104	C43	0.149104
+C25	0.138945	C44	0.138945
+C26	0.123439	C45	0.138629
+C27	0.112717	C46	0.123968
+C28	0.098056	C47	0.112121
+C29	0.083396	C48	0.089303
+C30	0.062266	C49	0.070424
+C31		C50	
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd353.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd353.txt
new file mode 100644
index 000000000..31434af01
--- /dev/null
+++ b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd353.txt
@@ -0,0 +1,17 @@
+Scd1_idx	Scd1	Scd2_idx	Scd2
+C15		C34	
+C17	0.162535	C36	0.162535
+C18	0.162817	C37	0.162817
+C19	0.162535	C38	0.162535
+C20	0.162535	C39	0.162535
+C21	0.162817	C40	0.162817
+C22	0.151268	C41	0.151268
+C23	0.142254	C42	0.142254
+C24	0.127606	C43	0.127606
+C25	0.117465	C44	0.117465
+C26	0.101972	C45	0.117183
+C27	0.092676	C46	0.102535
+C28	0.081408	C47	0.092676
+C29	0.068732	C48	0.073239
+C30	0.051267	C49	0.056901
+C31		C50	
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt
new file mode 100644
index 000000000..c26cf23d5
--- /dev/null
+++ b/studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt
@@ -0,0 +1,68 @@
+metadata = 'Mao'
+
+     T      P (atm)        Al       Al_wt       Scd1_idx         Scd1   Scd2_idx          Scd2   Scd1_wt     Kappa       Kappa_wt    n_ic
+323.15            1     0.631           1            C15                     C34                       1        58              1      10
+                                                     C17     0.198144        C36      0.198144                               
+                                                     C18     0.198128        C37      0.198128                               
+                                                     C19     0.198111        C38      0.198111                               
+                                                     C20     0.198095        C39      0.198095                               
+                                                     C21     0.198079        C40      0.198079                               
+                                                     C22     0.197799        C41      0.197537                               
+                                                     C23     0.198045        C42      0.198046                               
+                                                     C24     0.178844        C43      0.178844                               
+                                                     C25     0.167527        C44      0.178565                               
+                                                     C26     0.148851        C45       0.16751                                
+                                                     C27     0.134117        C46      0.148834                               
+                                                     C28     0.119646        C47        0.1341                                 
+                                                     C29     0.100969        C48      0.110956                               
+                                                     C30      0.07546        C49      0.087549                               
+                                                     C31                     C50                                             
+333.15            1      0.65           1            C15                     C34                       0        58              0      10
+                                                     C17     0.181121        C36      0.181121                               
+                                                     C18     0.180807        C37      0.180807                               
+                                                     C19     0.181055        C38      0.181055                               
+                                                     C20     0.180741        C39      0.180741
+                                                     C21     0.180989        C40      0.180989                                   
+                                                     C22     0.168579        C41      0.168579                               
+                                                     C23     0.169109        C42      0.169109
+                                                     C24     0.149104        C43      0.149104                                   
+                                                     C25     0.138945        C44      0.138945                               
+                                                     C26     0.123439        C45      0.138629                               
+                                                     C27     0.112717        C46      0.123968                               
+                                                     C28     0.098056        C47      0.112121                               
+                                                     C29     0.083396        C48      0.089303                               
+                                                     C30     0.062266        C49      0.070424                               
+                                                     C31                     C50                                             
+338.15            1     0.671           1            C15                     C34                       1        58              0      10
+                                                     C17     0.181121        C36      0.181121                               
+                                                     C18     0.180807        C37      0.180807                               
+                                                     C19     0.181055        C38      0.181055                               
+                                                     C20     0.180741        C39      0.180741                               
+                                                     C21     0.180989        C40      0.180989                               
+                                                     C22     0.168579        C41      0.168579                               
+                                                     C23     0.169109        C42      0.169109                               
+                                                     C24     0.149104        C43      0.149104                               
+                                                     C25     0.138945        C44      0.138945                               
+                                                     C26     0.123439        C45      0.138629                               
+                                                     C27     0.112717        C46      0.123968                               
+                                                     C28     0.098056        C47      0.112121                               
+                                                     C29     0.083396        C48      0.089303                               
+                                                     C30     0.062266        C49      0.070424                               
+                                                     C31                     C50                                             
+353.15            1     0.719           1            C15                     C34                       1        58              0      10
+                                                     C17     0.162535        C36      0.162535                               
+                                                     C18     0.162817        C37      0.162817                               
+                                                     C19     0.162535        C38      0.162535                               
+                                                     C20     0.162535        C39      0.162535                               
+                                                     C21     0.162817        C40      0.162817                               
+                                                     C22     0.151268        C41      0.151268                               
+                                                     C23     0.142254        C42      0.142254                               
+                                                     C24     0.127606        C43      0.127606                               
+                                                     C25     0.117465        C44      0.117465                               
+                                                     C26     0.101972        C45      0.117183                               
+                                                     C27     0.092676        C46      0.102535                               
+                                                     C28     0.081408        C47      0.092676                               
+                                                     C29     0.068732        C48      0.073239                               
+                                                     C30     0.051267        C49      0.056901                               
+                                                     C31                     C50                                             
+                                                                            
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt
new file mode 100644
index 000000000..f2bbb57e1
--- /dev/null
+++ b/studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt
@@ -0,0 +1,67 @@
+metadata = 'Mao'
+
+T           P (atm)     Al          Al_wt       Scd1_idx    Scd1        Scd2_idx    Scd2        Scd1_wt     Kappa       Kappa_wt    n_ic
+323.15      1           0.631       1           C15                     C34                     1           58          1           10
+                                                C17         0.198144    C36         0.198144                                        
+                                                C18         0.198128    C37         0.198128                                        
+                                                C19         0.198111    C38         0.198111                                        
+                                                C20         0.198095    C39         0.198095                                        
+                                                C21         0.198079    C40         0.198079                                        
+                                                C22         0.197799    C41         0.197537                                        
+                                                C23         0.198045    C42         0.198046                                        
+                                                C24         0.178844    C43         0.178844                                        
+                                                C25         0.167527    C44         0.178565                                        
+                                                C26         0.148851    C45         0.16751                                         
+                                                C27         0.134117    C46         0.148834                                        
+                                                C28         0.119646    C47         0.1341                                          
+                                                C29         0.100969    C48         0.110956                                        
+                                                C30         0.07546     C49         0.087549                                        
+                                                C31                     C50                                                         
+333.15      1           0.65        1           C15                     C34                     0           58          0           10
+                                                C17         0.181121    C36         0.181121                                        
+                                                C18         0.180807    C37         0.180807                                        
+                                                C19         0.181055    C38         0.181055                                        
+                                                C20         0.180741    C39         0.180741
+                                                C21         0.180989    C40         0.180989                                        
+                                                C22         0.168579    C41         0.168579                                        
+                                                C23         0.169109    C42         0.169109
+                                                C24         0.149104    C43         0.149104                                        
+                                                C25         0.138945    C44         0.138945                                        
+                                                C26         0.123439    C45         0.138629                                        
+                                                C27         0.112717    C46         0.123968                                        
+                                                C28         0.098056    C47         0.112121                                        
+                                                C29         0.083396    C48         0.089303                                        
+                                                C30         0.062266    C49         0.070424                                        
+                                                C31                     C50                                                         
+338.15      1           0.671       1           C15                     C34                     1           58          0           10
+                                                C17         0.181121    C36         0.181121                                        
+                                                C18         0.180807    C37         0.180807                                        
+                                                C19         0.181055    C38         0.181055                                        
+                                                C20         0.180741    C39         0.180741                                        
+                                                C21         0.180989    C40         0.180989                                        
+                                                C22         0.168579    C41         0.168579                                        
+                                                C23         0.169109    C42         0.169109                                        
+                                                C24         0.149104    C43         0.149104                                        
+                                                C25         0.138945    C44         0.138945                                        
+                                                C26         0.123439    C45         0.138629                                        
+                                                C27         0.112717    C46         0.123968                                        
+                                                C28         0.098056    C47         0.112121                                        
+                                                C29         0.083396    C48         0.089303                                        
+                                                C30         0.062266    C49         0.070424                                        
+                                                C31                     C50                                                         
+353.15      1           0.719       1           C15                     C34                     1           58          0           10
+                                                C17         0.162535    C36         0.162535                                        
+                                                C18         0.162817    C37         0.162817                                        
+                                                C19         0.162535    C38         0.162535                                        
+                                                C20         0.162535    C39         0.162535                                        
+                                                C21         0.162817    C40         0.162817                                        
+                                                C22         0.151268    C41         0.151268                                        
+                                                C23         0.142254    C42         0.142254                                        
+                                                C24         0.127606    C43         0.127606                                        
+                                                C25         0.117465    C44         0.117465                                        
+                                                C26         0.101972    C45         0.117183                                        
+                                                C27         0.092676    C46         0.102535                                        
+                                                C28         0.081408    C47         0.092676                                        
+                                                C29         0.068732    C48         0.073239                                        
+                                                C30         0.051267    C49         0.056901                                        
+                                                C31                     C50                                                         
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt
new file mode 100644
index 000000000..0ec75d7af
--- /dev/null
+++ b/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt
@@ -0,0 +1,65 @@
+T	P	Punit	MBAR	Al	Al_wt	Scd1_idx	Scd1	Scd2_idx	Scd2	Scd1_wt	Kappa	Kappa_wt	n_ic
+323.15	1	atm	FALSE	0.631	1	C15		C34		1	58	1	10
+						C17	0.198144	C36	0.198144				
+						C18	0.198128	C37	0.198128				
+						C19	0.198111	C38	0.198111				
+						C20	0.198095	C39	0.198095				
+						C21	0.198079	C40	0.198079				
+						C22	0.197799	C41	0.197537				
+						C23	0.198045	C42	0.198046				
+						C24	0.178844	C43	0.178844				
+						C25	0.167527	C44	0.178565				
+						C26	0.148851	C45	0.16751				
+						C27	0.134117	C46	0.148834				
+						C28	0.119646	C47	0.1341				
+						C29	0.100969	C48	0.110956				
+						C30	0.07546	C49	0.087549				
+						C31		C50					
+333.15	1	atm	FALSE	0.65	1	C15		C34		0	58	0	10
+						C17	0.181121	C36	0.181121				
+						C18	0.180807	C37	0.180807				
+						C19	0.181055	C38	0.181055				
+						C20	0.180741	C39	0.180741				
+						C21	0.180989	C40	0.180989				
+						C22	0.168579	C41	0.168579				
+						C23	0.169109	C42	0.169109				
+						C24	0.149104	C43	0.149104				
+						C25	0.138945	C44	0.138945				
+						C26	0.123439	C45	0.138629				
+						C27	0.112717	C46	0.123968				
+						C28	0.098056	C47	0.112121				
+						C29	0.083396	C48	0.089303				
+						C30	0.062266	C49	0.070424				
+						C31		C50					
+338.15	1	atm	FALSE	0.671	1	C15		C34		1	58	0	10
+						C17	0.181121	C36	0.181121				
+						C18	0.180807	C37	0.180807				
+						C19	0.181055	C38	0.181055				
+						C20	0.180741	C39	0.180741				
+						C21	0.180989	C40	0.180989				
+						C22	0.168579	C41	0.168579				
+						C23	0.169109	C42	0.169109				
+						C24	0.149104	C43	0.149104				
+						C25	0.138945	C44	0.138945				
+						C26	0.123439	C45	0.138629				
+						C27	0.112717	C46	0.123968				
+						C28	0.098056	C47	0.112121				
+						C29	0.083396	C48	0.089303				
+						C30	0.062266	C49	0.070424				
+						C31		C50					
+353.15	1	atm	FALSE	0.719	1	C15		C34		1	58	0	10
+						C17	0.162535	C36	0.162535				
+						C18	0.162817	C37	0.162817				
+						C19	0.162535	C38	0.162535				
+						C20	0.162535	C39	0.162535				
+						C21	0.162817	C40	0.162817				
+						C22	0.151268	C41	0.151268				
+						C23	0.142254	C42	0.142254				
+						C24	0.127606	C43	0.127606				
+						C25	0.117465	C44	0.117465				
+						C26	0.101972	C45	0.117183				
+						C27	0.092676	C46	0.102535				
+						C28	0.081408	C47	0.092676				
+						C29	0.068732	C48	0.073239				
+						C30	0.051267	C49	0.056901				
+						C31		C50					
diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt b/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt
index 8f211bdaa..3b653f242 100644
--- a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt
+++ b/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt
@@ -1,7 +1,7 @@
 # Experimental data for liquid bromine.
 
-  Temp_K Density_kg/m^3 w   Enthalpy_kJ/mol w    Temperature_K Pressure_bar
-  298.15 3102.8         1.0 29.96           1.0  298.15        1.01325
+  Temp (K) Pressure (bar) Density (kg/m^3) w   Hvap ( kJ/mol ) w    
+  298.15   1.01325        3102.8           1.0 29.96           1.0  
 
 # Variables: Denominators and weights for quantities
 Denoms  = 30 0.3
diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv b/studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv
new file mode 100644
index 000000000..847381612
--- /dev/null
+++ b/studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv
@@ -0,0 +1,8 @@
+"# Experimental data for liquid, bromine.",,,,,
+,,,,,
+Temp (K),Density (kg/m^3),w,Hvap (kJ/mol),w,Pressure (bar)
+298.15,3102.8,1,29.96,1,1.01325
+,,,,,
+# Variables: Denominators and weights for quantities,,,,,
+Denoms,=,30,0.3,,
+Weights,=,1.0,1.0,,
diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt b/studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt
new file mode 100644
index 000000000..333f48bbb
--- /dev/null
+++ b/studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt
@@ -0,0 +1,8 @@
+"# Experimental data for liquid, bromine."						
+						
+Temp (K)	Density (kg/m^3)	w	Hvap (kJ/mol)	w	Pressure (bar)
+298.15	3102.8	1	29.96	1	1.01325
+						
+# Variables: Denominators and weights for quantities						
+Denoms	=	30	0.3			
+Weights	=	1	1			
diff --git a/studies/004_thermo_liquid_bromine/test_parse.in b/studies/004_thermo_liquid_bromine/test_parse.in
new file mode 100644
index 000000000..6e1a704df
--- /dev/null
+++ b/studies/004_thermo_liquid_bromine/test_parse.in
@@ -0,0 +1,145 @@
+# ForceBalance input file generated by MakeInputFile.py
+
+# The octothorpe '#' is a comment symbol
+
+# Note: If the specified value is 'None' then the option will truly be set to
+# None - not the string 'None'
+
+# Note: 'Section' option types are more complicated and may require you to read
+# the documentation
+
+# Note: Boolean option types require no value, the key being present implies
+# 'True'
+
+$options
+# (string) Directory containing force fields, relative to project directory
+ffdir forcefield
+
+# (string) Type of the penalty, L2 or L1 in the optimizer
+penalty_type L2
+
+# (allcap) The job type, defaults to a single-point evaluation of objective
+# function
+jobtype newton
+
+# (list) The names of force fields, corresponding to directory
+# forcefields/file_name.(itp|gen)
+forcefield bro.itp
+
+# (int) Maximum number of steps in an optimization
+maxstep 100
+
+# (float) Convergence criterion of step size (just needs to fall below this
+# threshold)
+convergence_step 0.05
+
+# (float) Convergence criterion of objective function (in MainOptimizer this is
+# the stdev of x2 over 10 steps)
+convergence_objective 0.5
+
+# (float) Convergence criterion of gradient norm
+convergence_gradient 0.01
+
+# (float) Minimum eigenvalue for applying steepest descent correction in the
+# MainOptimizer
+eig_lowerbound 0.01
+
+# (float) Step size for finite difference derivatives in many functions
+# (get_(G/H) in fitsim, FDCheckG)
+finite_difference_h 0.001
+
+# (float) Factor for multiplicative penalty function in objective function
+penalty_additive 1.0
+
+trust0 1.0
+mintrust 0.05
+error_tolerance 1.0
+adaptive_factor 0.2
+adaptive_damping 1.0
+normalize_weights no
+print_hessian
+
+# Charge constraints are taken care of using "evals".
+constrain_charge false
+verbose_options false
+backup false
+
+$end
+
+$target
+name LiquidBromine
+type Thermo_GMX
+weight 1.0
+expdata_txt expset.txt
+quantities density h_vap
+n_sim_chain 2
+md_steps 100000
+eq_steps 50000
+$end
+
+$target
+name LiquidBromine_CSV
+type Thermo_GMX
+weight 1.0
+expdata_txt data.csv
+quantities density h_vap
+n_sim_chain 2
+md_steps 100000
+eq_steps 50000
+$end
+
+$target
+name LiquidBromine_TAB
+type Thermo_GMX
+weight 1.0
+expdata_txt data.tab.txt
+quantities density h_vap
+n_sim_chain 2
+md_steps 100000
+eq_steps 50000
+$end
+
+$target
+name Lipid_SPC
+type Thermo_GMX
+weight 1.0
+expdata_txt lipidcol1.txt
+quantities density h_vap
+n_sim_chain 2
+md_steps 100000
+eq_steps 50000
+$end
+
+$target
+name Lipid_RIT
+type Thermo_GMX
+weight 1.0
+expdata_txt lipidcol1.txt
+quantities density h_vap
+n_sim_chain 2
+md_steps 100000
+eq_steps 50000
+$end
+
+$target
+name Lipid_TAB
+type Thermo_GMX
+weight 1.0
+expdata_txt lipidcol1.txt
+quantities density h_vap
+n_sim_chain 2
+md_steps 100000
+eq_steps 50000
+$end
+
+$target
+name Lipid_MUL
+type Thermo_GMX
+weight 1.0
+expdata_txt lipidcol2a.txt
+quantities density h_vap
+n_sim_chain 2
+md_steps 100000
+eq_steps 50000
+$end
+

From 902264c506bbbd242dfeafce454a73afcfff20fe Mon Sep 17 00:00:00 2001
From: Lee-Ping <leeping@stanford.edu>
Date: Sun, 6 Apr 2014 09:37:25 -0700
Subject: [PATCH 03/25] Fix up exception handling

---
 src/PDB.py                                    | 125 +++++++++-------
 src/abinitio.py                               |  27 ++--
 src/binding.py                                |   6 +-
 src/contact.py                                |  18 ++-
 src/forcefield.py                             |  23 ++-
 src/gmxio.py                                  |  27 ++--
 src/interaction.py                            |   9 +-
 src/lipid.py                                  |  30 ++--
 src/liquid.py                                 |  51 ++++---
 src/molecule.py                               | 140 ++++++++++++------
 src/moments.py                                |   3 +-
 src/nifty.py                                  |  35 +++--
 src/objective.py                              |   6 +-
 src/openmmio.py                               |  40 +++--
 src/optimizer.py                              |   4 +-
 src/output.py                                 |   1 +
 src/parser.py                                 |   6 +-
 src/quantity.py                               |   5 +-
 src/target.py                                 |  36 +++--
 src/thermo.py                                 |  96 +++++++++---
 src/tinkerio.py                               |  31 ++--
 src/vibration.py                              |   9 +-
 .../targets/LiquidBromine/expset.txt          |   4 +-
 .../004_thermo_liquid_bromine/test_parse.in   |  14 +-
 24 files changed, 489 insertions(+), 257 deletions(-)

diff --git a/src/PDB.py b/src/PDB.py
index 4e4564a6d..2d2e77e46 100644
--- a/src/PDB.py
+++ b/src/PDB.py
@@ -52,6 +52,10 @@
 import copy  ### PC
 import numpy as np
 
+import forcebalance
+from forcebalance.output import *
+logger = getLogger(__name__)
+
 class END:
     """ END class
 
@@ -116,7 +120,7 @@ def __init__(self, line):
             self.numTer = toInt(string.strip(line[55:60]))
             self.numConect = toInt(string.strip(line[60:65]))
             self.numSeq = toInt(string.strip(line[65:70]))
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 
 class CONECT:
@@ -172,7 +176,7 @@ def __init__(self, line):
             except ValueError:  self.serial9 = None
             try:  self.serial10 = toInt(string.strip(line[56:61]))
             except ValueError:  self.serial10 = None
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class ENDMDL:
     """ ENDMDL class
@@ -219,7 +223,7 @@ def __init__(self, line):
                 self.chainID = None
                 self.resSeq = None
                 self.iCode = None
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class SIGUIJ:
     """ SIGUIJ class
@@ -268,7 +272,7 @@ def __init__(self, line):
             self.segID = string.strip(line[72:76])
             self.element = string.strip(line[76:78])
             self.charge = string.strip(line[78:80])
-        else: raise ValueError, record
+        else: logger.error(record+'\n') ; raise ValueError
 
 
 class ANISOU:
@@ -318,7 +322,7 @@ def __init__(self, line):
             self.segID = string.strip(line[72:76])
             self.element = string.strip(line[76:78])
             self.charge = string.strip(line[78:80])
-        else: raise ValueError, record
+        else: logger.error(record+'\n') ; raise ValueError
 
 class SIGATM:
     """ SIGATM class
@@ -369,7 +373,7 @@ def __init__(self, line):
             self.segID = string.strip(line[72:76])
             self.element = string.strip(line[76:78])
             self.charge = string.strip(line[78:80])
-        else: raise ValueError, record
+        else: logger.error(record+'\n') ; raise ValueError
 
 class HETATM:
     """ HETATM class
@@ -438,7 +442,7 @@ def __init__(self,line,sybylType="A.aaa",lBonds=[],lBondedAtoms=[]): ### PC
                 self.segID = ""
                 self.element = ""
                 self.charge = ""
-        else: raise ValueError, record
+        else: logger.error(record+'\n') ; raise ValueError
 
     def __str__(self):
         """
@@ -558,9 +562,11 @@ def read(self,file):
         
         # Do some error checking
         if start == -1:
-            raise Exception, "Unable to find '@<TRIPOS>ATOM' in MOL2 file!"
+            logger.error("Unable to find '@<TRIPOS>ATOM' in MOL2 file!\n")
+            raise RuntimeError
         elif stop == -1:
-            raise Exception, "Unable to find '@<TRIPOS>BOND' in MOL2 file!"
+            logger.error("Unable to find '@<TRIPOS>BOND' in MOL2 file!\n")
+            raise RuntimeError
 
         atoms = data[start+14:stop-2].split("\n")
         # BOND section
@@ -569,7 +575,8 @@ def read(self,file):
         
         # More error checking
         if stop == -1:
-            raise Exception, "Unable to find '@<TRIPOS>SUBSTRUCTURE' in MOL2 file!"
+            logger.error("Unable to find '@<TRIPOS>SUBSTRUCTURE' in MOL2 file!\n")
+            raise RuntimeError
 
         bonds = data[start+14:stop-1].split("\n")
         self.parseAtoms(atoms)
@@ -586,7 +593,8 @@ def parseAtoms(self,AtomList):
 
             # Error checking
             if len(SeparatedAtomLine) < 8:
-                raise Exception, "Bad atom entry in MOL2 file: %s" % AtomLine
+                logger.error("Bad atom entry in MOL2 file: %s\n" % AtomLine)
+                raise RuntimeError
 
             fakeRecord = "HETATM"
             fakeChain = " L"
@@ -598,7 +606,8 @@ def parseAtoms(self,AtomList):
                     float(SeparatedAtomLine[2]),float(SeparatedAtomLine[3]),
                     float(SeparatedAtomLine[4]))
             except ValueError:
-                raise Exception, "Bad atom entry in MOL2 file: %s" % AtomLine
+                logger.error("Bad atom entry in MOL2 file: %s\n" % AtomLine)
+                raise RuntimeError
 
             thisAtom = HETATM(mol2pdb, SeparatedAtomLine[5],[],[])
             self.lPDBAtoms.append(mol2pdb)
@@ -611,7 +620,8 @@ def parseBonds(self,BondList):
         for BondLine in BondList:
             SeparatedBondLine = BondLine.split()
             if len(SeparatedBondLine) < 4:
-                raise Exception, "Bad bond entry in MOL2 file: %s" % BondLine
+                logger.error("Bad bond entry in MOL2 file: %s\n" % BondLine)
+                raise RuntimeError
             try:
                 thisBond = MOL2BOND(
                     toInt(SeparatedBondLine[1]), # bond frm
@@ -620,7 +630,8 @@ def parseBonds(self,BondList):
                     toInt(SeparatedBondLine[0])  # bond id
                     )
             except ValueError:
-                raise Exception, "Bad bond entry in MOL2 file: %s" % BondLine
+                logger.error("Bad bond entry in MOL2 file: %s\n" % BondLine)
+                raise RuntimeError
             self.lBonds.append(thisBond)
 
     def createlBondedAtoms(self):
@@ -714,7 +725,7 @@ def __init__(self, line):
                 self.element = ""
                 self.charge = ""
         else:
-            raise ValueError, record
+            logger.error(record+'\n') ; raise ValueError
 
     def __str__(self):
         """
@@ -807,7 +818,7 @@ def __init__(self, line):
         record = string.strip(line[0:6])
         if record == "MODEL":
             self.serial = toInt(string.strip(line[10:14]))
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class TVECT:
     """ TVECT class
@@ -835,7 +846,7 @@ def __init__(self, line):
             self.t2 = float(string.strip(line[20:30]))
             self.t3 = float(string.strip(line[30:40]))
             self.text = string.strip(line[40:70])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class MTRIX3:
     """ MTRIX3 class
@@ -868,7 +879,7 @@ def __init__(self, line):
             self.mn3 = float(string.strip(line[30:40]))
             self.vn = float(string.strip(line[45:55]))
             self.iGiven = toInt(string.strip(line[59]))
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class MTRIX2:
     """ MTRIX2 class
@@ -901,7 +912,7 @@ def __init__(self, line):
             self.mn3 = float(string.strip(line[30:40]))
             self.vn = float(string.strip(line[45:55]))
             self.iGiven = toInt(string.strip(line[59]))
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class MTRIX1:
     """ MTRIX1 class
@@ -936,7 +947,7 @@ def __init__(self, line):
             try:  self.iGiven = toInt(string.strip(line[45:55]))
             except ValueError:  self.iGiven = None
             except IndexError:  self.iGiven = None
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class SCALE3:
     """ SCALE3 class
@@ -964,7 +975,7 @@ def __init__(self, line):
             self.sn2 = float(string.strip(line[20:30]))
             self.sn3 = float(string.strip(line[30:40]))
             self.un = float(string.strip(line[45:55]))
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class SCALE2:
     """ SCALE2 class
@@ -992,7 +1003,7 @@ def __init__(self, line):
             self.sn2 = float(string.strip(line[20:30]))
             self.sn3 = float(string.strip(line[30:40]))
             self.un = float(string.strip(line[45:55]))
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class SCALE1:
     """ SCALE1 class
@@ -1020,7 +1031,7 @@ def __init__(self, line):
             self.sn2 = float(string.strip(line[20:30]))
             self.sn3 = float(string.strip(line[30:40]))
             self.un = float(string.strip(line[45:55]))
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class ORIGX2:
     """ ORIGX2 class
@@ -1047,7 +1058,7 @@ def __init__(self, line):
             self.on2 = float(string.strip(line[20:30]))
             self.on3 = float(string.strip(line[30:40]))
             self.tn = float(string.strip(line[45:55]))
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class ORIGX3:
     """ ORIGX3 class
@@ -1074,7 +1085,7 @@ def __init__(self, line):
             self.on2 = float(string.strip(line[20:30]))
             self.on3 = float(string.strip(line[30:40]))
             self.tn = float(string.strip(line[45:55]))
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class ORIGX1:
     """ ORIGX1 class
@@ -1101,7 +1112,7 @@ def __init__(self, line):
             self.on2 = float(string.strip(line[20:30]))
             self.on3 = float(string.strip(line[30:40]))
             self.tn = float(string.strip(line[45:55]))
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class CRYST1:
     """ CRYST1 class
@@ -1136,7 +1147,7 @@ def __init__(self, line):
             self.gamma = float(string.strip(line[47:54]))
             self.sGroup = string.strip(line[55:65])
             self.z = toInt(string.strip(line[66:70]))
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 
 class SITE:
@@ -1210,7 +1221,7 @@ def __init__(self, line):
             self.seq4 = toInt(string.strip(line[56:60]))
             try:  self.iCode4 = string.strip(line[60])
             except IndexError:  self.iCode4 = None
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class CISPEP:
     """ CISPEP field
@@ -1251,7 +1262,7 @@ def __init__(self, line):
             self.icode2 = string.strip(line[35])
             self.modNum = toInt(string.strip(line[43:46]))
             self.measure = float(string.strip(line[53:59]))
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class SLTBRG:
     """ SLTBRG field
@@ -1297,7 +1308,7 @@ def __init__(self, line):
             self.iCode2 = string.strip(line[56])
             self.sym1 = string.strip(line[59:65])
             self.sym2 = string.strip(line[66:72])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class HYDBND:
     """ HYDBND field
@@ -1354,7 +1365,7 @@ def __init__(self, line):
             self.ICode2 = string.strip(line[58])
             self.sym1 = string.strip(line[59:65])
             self.sym2 = string.strip(line[66:72])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class LINK:
     """ LINK field
@@ -1402,7 +1413,7 @@ def __init__(self, line):
             self.iCode2 = string.strip(line[56])
             self.sym1 = string.strip(line[59:65])
             self.sym2 = string.strip(line[66:72])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 
 class SSBOND:
@@ -1440,7 +1451,7 @@ def __init__(self, line):
             self.icode2 = string.strip(line[35])
             self.sym1 = string.strip(line[59:65])
             self.sym2 = string.strip(line[66:72])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class TURN:
     """ TURN field
@@ -1489,7 +1500,7 @@ def __init__(self, line):
             self.endSeqNum = toInt(string.strip(line[31:35]))
             self.endICode = string.strip(line[35])
             self.comment = string.strip(line[40:70])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class SHEET:
     """ SHEET field
@@ -1584,7 +1595,7 @@ def __init__(self, line):
                 self.prevChainID = None
                 self.prevResSeq = None
                 self.prevICode = None
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class HELIX:
     """ HELIX field
@@ -1638,7 +1649,7 @@ def __init__(self, line):
             self.comment = string.strip(line[40:70])
             try:  self.length = toInt(string.strip(line[71:76]))
             except ValueError:  self.length = None
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class FORMUL:
     """ FORMUL field
@@ -1664,7 +1675,7 @@ def __init__(self, line):
             self.hetID = string.strip(line[12:15])
             self.asterisk = string.strip(line[19])
             self.text = string.strip(line[19:70])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class HETSYN:
     """ HETSYN field
@@ -1687,7 +1698,7 @@ def __init__(self, line):
         if record == "HETSYN":
             self.hetID = string.strip(line[11:14])
             self.hetSynonyms = string.strip(line[15:70])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class HETNAM:
     """ HETNAM field
@@ -1709,7 +1720,7 @@ def __init__(self, line):
         if record == "HETNAM":
             self.hetID = string.strip(line[11:14])
             self.text = string.strip(line[15:70])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class HET:
     """ HET field
@@ -1749,7 +1760,7 @@ def __init__(self, line):
             self.iCode = string.strip(line[17])
             self.numHetAtoms = toInt(string.strip(line[20:25]))
             self.text = string.strip(line[30:70])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class MODRES:
     """ MODRES field
@@ -1783,7 +1794,7 @@ def __init__(self, line):
             string.iCode = string.strip(line[22])
             string.stdRes = string.strip(line[24:27])
             string.comment = string.strip(line[29:70])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class SEQRES:
     """ SEQRES field
@@ -1840,7 +1851,7 @@ def __init__(self, line):
             self.resName.append(string.strip(line[59:62]))
             self.resName.append(string.strip(line[63:66]))
             self.resName.append(string.strip(line[67:70]))
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class SEQADV:
     """ SEQADV field
@@ -1886,7 +1897,7 @@ def __init__(self, line):
             try:  self.dbSeq = toInt(string.strip(line[43:48]))
             except ValueError:  self.dbSeq = None
             self.conflict = string.strip(line[49:70])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class DBREF:
     """ DBREF field
@@ -1951,7 +1962,7 @@ def __init__(self, line):
             self.dbseqEnd = toInt(string.strip(line[62:67]))
             try:  self.dbinsEnd = string.strip(line[67])
             except IndexError:  self.dbinsEnd = None
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class REMARK:
     """ REMARK field
@@ -2022,7 +2033,7 @@ def __init__(self, line):
         record = string.strip(line[0:6])
         if record == "JRNL":
             self.text = string.strip(line[12:70])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class SPRSDE:
     """ SPRSDE field
@@ -2064,7 +2075,7 @@ def __init__(self, line):
             self.sIdCodes.append(string.strip(line[56:60]))
             self.sIdCodes.append(string.strip(line[61:65]))
             self.sIdCodes.append(string.strip(line[66:70]))
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class REVDAT:
     """ REVDAT field
@@ -2106,7 +2117,7 @@ def __init__(self, line):
             self.records.append(string.strip(line[46:52]))
             self.records.append(string.strip(line[53:59]))
             self.records.append(string.strip(line[60:66]))
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class AUTHOR:
     """ AUTHOR field
@@ -2127,7 +2138,7 @@ def __init__(self, line):
         record = string.strip(line[0:6])
         if record == "AUTHOR":
             self.authorList = string.strip(line[10:70])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class EXPDTA:
     """ EXPDTA field
@@ -2158,7 +2169,7 @@ def __init__(self, line):
         record = string.strip(line[0:6])
         if record == "EXPDTA":
             self.technique = string.strip(line[10:70])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class KEYWDS:
     """ KEYWDS field
@@ -2183,7 +2194,7 @@ def __init__(self, line):
         record = string.strip(line[0:6])
         if record == "KEYWDS":
             self.keywds = string.strip(line[10:70])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class SOURCE:
     """ SOURCE field
@@ -2207,7 +2218,7 @@ def __init__(self, line):
         record = string.strip(line[0:6])
         if record == "SOURCE":
             self.source = string.strip(line[10:70])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 
 class COMPND:
@@ -2237,7 +2248,7 @@ def __init__(self, line):
         record = string.strip(line[0:6])
         if record == "COMPND":
             self.compound = string.strip(line[10:70])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class CAVEAT:
     """ CAVEAT field
@@ -2260,7 +2271,7 @@ def __init__(self, line):
         if record == "CAVEAT":
             self.idCode = string.strip(line[11:15])
             self.comment = string.strip(line[19:70])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class TITLE:
     """ TITLE field
@@ -2281,7 +2292,7 @@ def __init__(self, line):
         record = string.strip(line[0:6])
         if record == "TITLE":
             self.title = string.strip(line[10:70])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
     
 class OBSLTE:
     """ OBSLTE field
@@ -2332,7 +2343,7 @@ def __init__(self, line):
             self.rIdCodes.append(string.strip(line[56:60]))
             self.rIdCodes.append(string.strip(line[61:65]))
             self.rIdCodes.append(string.strip(line[67:70]))
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 class HEADER:
     """ HEADER field 
@@ -2359,7 +2370,7 @@ def __init__(self, line):
             self.classification = string.strip(line[10:50])
             self.depDate = string.strip(line[50:59])
             self.IDcode = string.strip(line[62:66])
-        else:  raise ValueError, record
+        else:  logger.error(record+'\n') ; raise ValueError
 
 def readAtom(line):
     """
@@ -2392,7 +2403,7 @@ def readAtom(line):
                 self.segID = 0
                 self.element = 0
                 self.charge = 0
-        else: raise ValueError, record
+        else: logger.error(record+'\n') ; raise ValueError
     """
 
     # Try to find 5 consecutive floats
diff --git a/src/abinitio.py b/src/abinitio.py
index c30a8af9a..b6844cd51 100644
--- a/src/abinitio.py
+++ b/src/abinitio.py
@@ -210,7 +210,8 @@ def compute_netforce_torque(self, xyz, force, QM=False):
         elif self.force_map == 'chargegroup' and 'ChargeGroupNumber' in self.AtomLists:
             Block = self.AtomLists['ChargeGroupNumber']
         else:
-            raise Exception('force_map keyword "%s" is invalid. Please choose from: %s' % (self.force_map, ', '.join(['"%s"' % kwds[k] for k in self.AtomLists.keys() if k in kwds])))
+            logger.error('force_map keyword "%s" is invalid. Please choose from: %s\n' % (self.force_map, ', '.join(['"%s"' % kwds[k] for k in self.AtomLists.keys() if k in kwds])))
+            raise RuntimeError
 
         nft = self.fitatoms
         # Number of particles that the force is acting on
@@ -225,14 +226,16 @@ def compute_netforce_torque(self, xyz, force, QM=False):
         mask = np.array([i for i in range(npr) if self.AtomMask[i]])
         
         if nfp not in [npr, nat]:
-            raise RuntimeError('Force contains %i particles but expected %i or %i' % (nfp, npr, nat))
+            logger.error('Force contains %i particles but expected %i or %i\n' % (nfp, npr, nat))
+            raise RuntimeError
         elif nfp == nat:
             frc1 = force.reshape(-1,3)[:nft].flatten()
         elif nfp == npr:
             frc1 = force.reshape(-1,3)[mask][:nft].flatten()
 
         if nxp not in [npr, nat]:
-            raise RuntimeError('Coordinates contains %i particles but expected %i or %i' % (nfp, npr, nat))
+            logger.error('Coordinates contains %i particles but expected %i or %i\n' % (nfp, npr, nat))
+            raise RuntimeError
         elif nxp == nat:
             xyz1 = xyz[:nft]
         elif nxp == npr:
@@ -377,7 +380,8 @@ def read_reference_data(self):
             self.emd0 -= np.mean(self.emd0)
         if self.whamboltz == True:
             if self.attenuate:
-                raise RuntimeError('whamboltz and attenuate are mutually exclusive')
+                logger.error('whamboltz and attenuate are mutually exclusive\n')
+                raise RuntimeError
             self.boltz_wts = np.array([float(i.strip()) for i in open(os.path.join(self.root,self.tgtdir,"wham-weights.txt")).readlines()])
             #   This is a constant pre-multiplier in front of every snapshot.
             bar = printcool("Using WHAM MM Boltzmann weights.", color=4)
@@ -469,13 +473,15 @@ def energy_all(self):
         if hasattr(self, 'engine'):
             return self.engine.energy().reshape(-1,1)
         else:
-            raise NotImplementedError("Target must contain an engine object")
+            logger.error("Target must contain an engine object\n")
+            raise NotImplementedError
 
     def energy_force_all(self):
         if hasattr(self, 'engine'):
             return self.engine.energy_force()
         else:
-            raise NotImplementedError("Target must contain an engine object")
+            logger.error("Target must contain an engine object\n")
+            raise NotImplementedError
         
     def energy_force_transform(self):
         if self.force:
@@ -498,13 +504,15 @@ def energy_one(self, i):
         if hasattr(self, 'engine'):
             return self.engine.energy_one(i)
         else:
-            raise NotImplementedError("Target must contain an engine object")
+            logger.error("Target must contain an engine object\n")
+            raise NotImplementedError
 
     def energy_force_one(self, i):
         if hasattr(self, 'engine'):
             return self.engine.energy_force_one(i)
         else:
-            raise NotImplementedError("Target must contain an engine object")
+            logger.error("Target must contain an engine object\n")
+            raise NotImplementedError
 
     def energy_force_transform_one(self,i):
         if self.force:
@@ -1165,7 +1173,8 @@ def get(self, mvals, AGrad=False, AHess=False):
             for i in Answer_ESP:
                 Answer[i] += w_resp * Answer_ESP[i]
         if not any([self.energy, self.force, self.resp]):
-            raise Exception("Ab initio fitting must have at least one of: Energy, Force, ESP")
+            logger.error("Ab initio fitting must have at least one of: Energy, Force, ESP\n")
+            raise RuntimeError
         if not in_fd():
             self.objective = Answer['X']
         return Answer
diff --git a/src/binding.py b/src/binding.py
index 7c0e9bb40..6bb974f4f 100644
--- a/src/binding.py
+++ b/src/binding.py
@@ -133,7 +133,8 @@ def __init__(self,options,tgt_opts,forcefield):
                     self.inter_opts[inter][opt] = self.global_opts[opt]
         for inter in self.inter_opts:
             if 'energy_unit' in self.inter_opts[inter] and self.inter_opts[inter]['energy_unit'].lower() not in ['kilocalorie_per_mole', 'kilocalories_per_mole']:
-                raise RuntimeError('Usage of physical units is has been removed, please provide all binding energies in kcal/mole')
+                logger.error('Usage of physical units is has been removed, please provide all binding energies in kcal/mole\n')
+                raise RuntimeError
             self.inter_opts[inter]['reference_physical'] = self.inter_opts[inter]['energy']
 
         if tgt_opts['energy_denom'] == 0.0:
@@ -152,7 +153,8 @@ def __init__(self,options,tgt_opts,forcefield):
         if self.cauchy:
             logger.info("Each contribution to the interaction energy objective function will be scaled by 1.0 / ( denom**2 + reference**2 )\n")
             if self.attenuate:
-                raise Exception('attenuate and cauchy are mutually exclusive')
+                logger.error('attenuate and cauchy are mutually exclusive\n')
+                raise RuntimeError
         elif self.attenuate:
             logger.info("Repulsive interactions beyond energy_denom will be scaled by 1.0 / ( denom**2 + (reference-denom)**2 )\n")
         ## Build keyword dictionaries to pass to engine.
diff --git a/src/contact.py b/src/contact.py
index 6ca64e1a5..bb1f4eb68 100644
--- a/src/contact.py
+++ b/src/contact.py
@@ -29,15 +29,18 @@ def atom_distances(xyzlist, atom_contacts, box=None):
     # check shapes
     traj_length, num_atoms, num_dims = xyzlist.shape
     if not num_dims == 3:
-        raise ValueError("xyzlist must be an n x m x 3 array")
+        logger.error("xyzlist must be an n x m x 3 array\n")
+        raise ValueError
     try: 
         num_contacts, width = atom_contacts.shape
         assert width is 2
     except (AttributeError, ValueError, AssertionError):
-        raise ValueError('contacts must be an n x 2 array')
+        logger.error('contacts must be an n x 2 array\n')
+        raise ValueError
         
     if not np.all(np.unique(atom_contacts) < num_atoms):
-        raise ValueError('Atom contacts goes larger than num_atoms')
+        logger.error('Atom contacts goes larger than num_atoms\n')
+        raise ValueError
     
     # check type
     if xyzlist.dtype != np.float32:
@@ -59,7 +62,8 @@ def atom_distances(xyzlist, atom_contacts, box=None):
         _contact_wrap.atomic_contact_wrap(xyzlist, atom_contacts, results)
     else:
         if box.shape != (3,):
-            raise ValueError('box must be a 3-element array')
+            logger.error('box must be a 3-element array\n')
+            raise ValueError
         if box.dtype != np.float32:
             box = np.float32(box)
         # make sure contiguous
@@ -97,12 +101,14 @@ def residue_distances(xyzlist, residue_membership, residue_contacts):
     
     traj_length, num_atoms, num_dims = xyzlist.shape
     if not num_dims == 3:
-        raise ValueError("xyzlist must be n x m x 3")
+        logger.error("xyzlist must be n x m x 3\n")
+        raise ValueError
     try: 
         num_contacts, width = residue_contacts.shape
         assert width is 2
     except (AttributeError, ValueError, AssertionError):
-        raise ValueError('residue_contacts must be an n x 2 array')
+        logger.error('residue_contacts must be an n x 2 array\n')
+        raise ValueError
         
     # check type
     if xyzlist.dtype != np.float32:
diff --git a/src/forcefield.py b/src/forcefield.py
index b49c1b46d..7db442a39 100644
--- a/src/forcefield.py
+++ b/src/forcefield.py
@@ -179,7 +179,8 @@ def __missing__(self, key):
         try:
             return self.backup_dict[self['AtomType']][key]
         except:
-            raise KeyError('The key %s does not exist as an atom attribute or as an atom type attribute!' % key)
+            logger.error('The key %s does not exist as an atom attribute or as an atom type attribute!\n' % key)
+            raise KeyError
 
 class FF(forcebalance.BaseClass):
     """ Force field class.
@@ -477,7 +478,7 @@ def addff_txt(self, ffname, fftype):
             for k in kwds:
                 if sline.count(k) > 1:
                     logger.error(line)
-                    logger.error("The above line contains multiple occurrences of the keyword %s" % k)
+                    logger.error("The above line contains multiple occurrences of the keyword %s\n" % k)
                     raise RuntimeError
                 elif sline.count(k) == 1:
                     marks[k] = (np.array(sline) == k).argmax()
@@ -531,7 +532,8 @@ def addff_txt(self, ffname, fftype):
                             count += 1
                         sys.stderr.write("\nOffending ID: %s\n" % sline[parse+1])
                         
-                        raise Exception('Parameter repetition entry in force field file is incorrect (see above)')
+                        logger.error('Parameter repetition entry in force field file is incorrect (see above)\n')
+                        raise RuntimeError
                     pid = self.Readers[ffname].build_pid(pfld)
                     self.map[pid] = prep
                     # This repeated parameter ID also has these atoms involved.
@@ -626,9 +628,11 @@ def make(self,vals=None,use_pvals=False,printdir=None,precision=12):
         
         """
         if type(vals)==np.ndarray and vals.ndim != 1:
-            raise Exception('Please only pass 1-D arrays')
+            logger.error('Please only pass 1-D arrays\n')
+            raise RuntimeError
         if len(vals) != self.np:
-            raise Exception('Input parameter np.array (%i) not the required size (%i)' % (len(vals), self.np))
+            logger.error('Input parameter np.array (%i) not the required size (%i)\n' % (len(vals), self.np))
+            raise RuntimeError
         if use_pvals or self.use_pvals:
             logger.info("Using physical parameters directly!\r")
             pvals = vals.copy().flatten()
@@ -684,7 +688,8 @@ def TXTFormat(number, precision):
                         wval = eval(cmd.replace("PARM","PRM"))
                     except:
                         logger.error(traceback.format_exc() + '\n')
-                        raise Exception("The command %s (written in the force field file) cannot be evaluated!" % cmd)
+                        logger.error("The command %s (written in the force field file) cannot be evaluated!\n" % cmd)
+                        raise RuntimeError
                 else:
                     wval = mult*pvals[i]
                 if self.ffdata_isxml[fnm]:
@@ -833,7 +838,8 @@ def create_pvals(self,mvals):
                 pvals = np.exp(mvals.flatten()) * self.pvals0
             except:
                 logger.exception(mvals + '\n')
-                raise Exception('What the hell did you do?')
+                logger.error('What the hell did you do?\n')
+                raise RuntimeError
         else:
             pvals = flat(np.matrix(self.tmI)*col(mvals)) + self.pvals0
         concern= ['polarizability','epsilon','VDWT']
@@ -859,7 +865,8 @@ def create_mvals(self,pvals):
         @return mvals The mathematical parameters
         """
         if self.logarithmic_map:
-            raise Exception('create_mvals has not been implemented for logarithmic_map')
+            logger.error('create_mvals has not been implemented for logarithmic_map\n')
+            raise RuntimeError
         mvals = flat(invert_svd(self.tmI) * col(pvals - self.pvals0))
         return mvals
         
diff --git a/src/gmxio.py b/src/gmxio.py
index 04e678a73..273438ada 100644
--- a/src/gmxio.py
+++ b/src/gmxio.py
@@ -73,7 +73,8 @@ def write_mdp(fout, options, fin=None, defaults={}, verbose=False):
                 val = options[key]
                 val0 = valf.strip()
                 if key in clashes and val != val0:
-                    raise RuntimeError("write_mdp tried to set %s = %s but its original value was %s = %s" % (key, val, key, val0))
+                    logger.error("write_mdp tried to set %s = %s but its original value was %s = %s\n" % (key, val, key, val0))
+                    raise RuntimeError
                 # Passing None as the value causes the option to be deleted
                 if val == None: continue
                 if len(val) < len(valf):
@@ -518,7 +519,8 @@ def setopts(self, **kwargs):
             warn_once("The 'gmxpath' option was not specified; using default.")
             if which('mdrun'+self.gmxsuffix) == '':
                 warn_press_key("Please add GROMACS executables to the PATH or specify gmxpath.")
-                raise RuntimeError("Cannot find the GROMACS executables!")
+                logger.error("Cannot find the GROMACS executables!\n")
+                raise RuntimeError
             else:
                 self.gmxpath = which('mdrun'+self.gmxsuffix)
                 havegmx = True
@@ -629,7 +631,8 @@ def prepare(self, pbc=False, **kwargs):
         if self.top != None and os.path.exists(self.top):
             LinkFile(self.top, '%s.top' % self.name)
         else:
-            raise RuntimeError("No .top file found, cannot continue.")
+            logger.error("No .top file found, cannot continue.\n")
+            raise RuntimeError
         write_mdp("%s.mdp" % self.name, gmx_opts, fin=self.mdp, defaults=self.gmx_defs)
 
         ## Call grompp followed by gmxdump to read the trajectory
@@ -678,13 +681,15 @@ def links(self):
             if topfile != None:
                 LinkFile(topfile, "%s.top" % self.name)
             else:
-                raise RuntimeError("No .top file found, cannot continue.")
+                logger.error("No .top file found, cannot continue.\n")
+                raise RuntimeError
         if not os.path.exists('%s.mdp' % self.name):
             mdpfile = onefile('mdp')
             if mdpfile != None:
                 LinkFile(mdpfile, "%s.mdp" % self.name, nosrcok=True)
             else:
-                raise RuntimeError("No .mdp file found, cannot continue.")
+                logger.error("No .mdp file found, cannot continue.\n")
+                raise RuntimeError
 
     def callgmx(self, command, stdin=None, print_to_screen=False, print_command=False, **kwargs):
 
@@ -738,7 +743,8 @@ def warngmx(self, command, warnings=[], maxwarn=1, **kwargs):
         elif fatal:
             for line in o:
                 logger.error(line+'\n')
-            raise RuntimeError('grompp encountered a fatal error!')
+            logger.error('grompp encountered a fatal error!\n')
+            raise RuntimeError
         return o
 
     def energy_termnames(self, edrfile=None):
@@ -748,7 +754,8 @@ def energy_termnames(self, edrfile=None):
         if edrfile == None:
             edrfile = "%s.edr" % self.name
         if not os.path.exists(edrfile):
-            raise RuntimeError('Cannot determine energy term names without an .edr file')
+            logger.error('Cannot determine energy term names without an .edr file\n')
+            raise RuntimeError
         ## Figure out which energy terms need to be printed.
         o = self.callgmx("g_energy -f %s -xvg no" % (edrfile), stdin="Total-Energy\n", copy_stdout=False, copy_stderr=True)
         parsemode = 0
@@ -1352,7 +1359,8 @@ def __init__(self,options,tgt_opts,forcefield):
         # Error checking.
         for i in self.nptfiles:
             if not os.path.exists(os.path.join(self.root, self.tgtdir, i)):
-                raise RuntimeError('Please provide %s; it is needed to proceed.' % i)
+                logger.error('Please provide %s; it is needed to proceed.\n' % i)
+                raise RuntimeError
         # Send back last frame of production trajectory.
         self.extra_output = ['liquid-md.gro']
         # Send back the trajectory file.
@@ -1402,7 +1410,8 @@ def __init__(self,options,tgt_opts,forcefield):
         # Error checking.
         for i in self.nptfiles:
             if not os.path.exists(os.path.join(self.root, self.tgtdir, i)):
-                raise RuntimeError('Please provide %s; it is needed to proceed.' % i)
+                logger.error('Please provide %s; it is needed to proceed.\n' % i)
+                raise RuntimeError
         # Send back last frame of production trajectory.
         self.extra_output = ['lipid-md.gro']
         # Send back the trajectory file.
diff --git a/src/interaction.py b/src/interaction.py
index f82aed455..7ae3052db 100644
--- a/src/interaction.py
+++ b/src/interaction.py
@@ -56,12 +56,14 @@ def __init__(self,options,tgt_opts,forcefield):
         ## Set fragment 1
         self.set_option(tgt_opts,'fragment1','fragment1')
         if len(self.fragment1) == 0:
-            raise Exception('You need to define the first fragment using the fragment1 keyword')
+            logger.error('You need to define the first fragment using the fragment1 keyword\n')
+            raise RuntimeError
         self.select1 = np.array(uncommadash(self.fragment1))
         ## Set fragment 2
         self.set_option(tgt_opts,'fragment2','fragment2')
         if len(self.fragment2) == 0:
-            raise Exception('You need to define the second fragment using the fragment2 keyword')
+            logger.error('You need to define the second fragment using the fragment2 keyword\n')
+            raise RuntimeError
         self.select2 = np.array(uncommadash(self.fragment2))
         ## Set upper cutoff energy
         self.set_option(tgt_opts,'energy_upper','energy_upper')
@@ -94,7 +96,8 @@ def __init__(self,options,tgt_opts,forcefield):
         if self.cauchy:
             self.divisor = np.sqrt(self.eqm**2 + denom**2)
             if self.attenuate:
-                raise Exception('attenuate and cauchy are mutually exclusive')
+                logger.error('attenuate and cauchy are mutually exclusive\n')
+                raise RuntimeError
         elif self.attenuate:
             # Attenuate only large repulsions.
             self.divisor = np.zeros(len(self.eqm))
diff --git a/src/lipid.py b/src/lipid.py
index dc73bcacf..8a8f9bfee 100644
--- a/src/lipid.py
+++ b/src/lipid.py
@@ -102,7 +102,8 @@ def __init__(self,options,tgt_opts,forcefield):
         #======================================#
         # Read in lipid starting coordinates.
         if not os.path.exists(os.path.join(self.root, self.tgtdir, self.lipid_coords)): 
-            raise RuntimeError("%s doesn't exist; please provide lipid_coords option" % self.lipid_coords)
+            logger.error("%s doesn't exist; please provide lipid_coords option\n" % self.lipid_coords)
+            raise RuntimeError
         self.lipid_mol = Molecule(os.path.join(self.root, self.tgtdir, self.lipid_coords))
         # List of trajectory files that may be deleted if self.save_traj == 1.
         self.last_traj = []
@@ -174,11 +175,14 @@ def read_data(self):
                 found_headings = True
                 headings = line
                 if len(set(headings)) != len(headings):
-                    raise Exception('Column headings in data.csv must be unique')
+                    logger.error('Column headings in data.csv must be unique\n')
+                    raise RuntimeError
                 if 'p' not in headings:
-                    raise Exception('There must be a pressure column heading labeled by "p" in data.csv')
+                    logger.error('There must be a pressure column heading labeled by "p" in data.csv\n')
+                    raise RuntimeError
                 if 't' not in headings:
-                    raise Exception('There must be a temperature column heading labeled by "t" in data.csv')
+                    logger.error('There must be a temperature column heading labeled by "t" in data.csv\n')
+                    raise RuntimeError
             elif found_headings:
                 try:
                     # Temperatures are in kelvin.
@@ -188,7 +192,8 @@ def read_data(self):
                     punit = [val.split()[1] if len(val.split()) >= 1 else "atm" for head, val in zip(headings,line) if head == 'p'][0]
                     unrec = set([punit]).difference(['atm','bar']) 
                     if len(unrec) > 0:
-                        raise Exception('The pressure unit %s is not recognized, please use bar or atm' % unrec[0])
+                        logger.error('The pressure unit %s is not recognized, please use bar or atm\n' % unrec[0])
+                        raise RuntimeError
                     # This line actually reads the reference data and inserts it into the RefData dictionary of dictionaries.
                     for head, val in zip(headings,line):
                         if head == 't' or head == 'p' : continue
@@ -202,17 +207,20 @@ def read_data(self):
                             self.RefData.setdefault(head,OrderedDict([]))[(t,pval,punit)] = np.array(map(float, val.split()))
                 except:
                     logger.error(line + '\n')
-                    raise Exception('Encountered an error reading this line!')
+                    logger.error('Encountered an error reading this line!\n')
+                    raise RuntimeError
             else:
                 logger.error(line + '\n')
-                raise Exception('I did not recognize this line!')
+                logger.error('I did not recognize this line!\n')
+                raise RuntimeError
         # Check the reference data table for validity.
         default_denoms = defaultdict(int)
         PhasePoints = None
         for head in self.RefData:
             if head not in known_vars+[i+"_wt" for i in known_vars]:
                 # Only hard-coded properties may be recognized.
-                raise Exception("The column heading %s is not recognized in data.csv" % head)
+                logger.error("The column heading %s is not recognized in data.csv\n" % head)
+                raise RuntimeError
             if head in known_vars:
                 if head+"_wt" not in self.RefData:
                     # If the phase-point weights are not specified in the reference data file, initialize them all to one.
@@ -500,7 +508,8 @@ def get(self, mvals, AGrad=True, AHess=True):
                 # for obs in self.RefData:
                 #     del self.RefData[obs][PT]
         if len(Points) == 0:
-            raise Exception('The lipid simulations have terminated with \x1b[1;91mno readable data\x1b[0m - this is a problem!')
+            logger.error('The lipid simulations have terminated with \x1b[1;91mno readable data\x1b[0m - this is a problem!\n')
+            raise RuntimeError
 
         # Assign variable names to all the stuff in npt_result.p
         Rhos, Vols, Potentials, Energies, Dips, Grads, GDips, \
@@ -508,7 +517,8 @@ def get(self, mvals, AGrad=True, AHess=True):
         # Determine the number of molecules
         if len(set(NMols)) != 1:
             logger.error(str(NMols))
-            raise Exception('The above list should only contain one number - the number of molecules')
+            logger.error('The above list should only contain one number - the number of molecules\n')
+            raise RuntimeError
         else:
             NMol = list(set(NMols))[0]
     
diff --git a/src/liquid.py b/src/liquid.py
index e276037d9..d394eb051 100644
--- a/src/liquid.py
+++ b/src/liquid.py
@@ -104,11 +104,13 @@ def __init__(self,options,tgt_opts,forcefield):
         #======================================#
         # Read in liquid starting coordinates.
         if not os.path.exists(os.path.join(self.root, self.tgtdir, self.liquid_coords)): 
-            raise RuntimeError("%s doesn't exist; please provide liquid_coords option" % self.liquid_coords)
+            logger.error("%s doesn't exist; please provide liquid_coords option\n" % self.liquid_coords)
+            raise RuntimeError
         self.liquid_mol = Molecule(os.path.join(self.root, self.tgtdir, self.liquid_coords))
         # Read in gas starting coordinates.
         if not os.path.exists(os.path.join(self.root, self.tgtdir, self.gas_coords)): 
-            raise RuntimeError("%s doesn't exist; please provide gas_coords option" % self.gas_coords)
+            logger.error("%s doesn't exist; please provide gas_coords option\n" % self.gas_coords)
+            raise RuntimeError
         self.gas_mol = Molecule(os.path.join(self.root, self.tgtdir, self.gas_coords))
         # List of trajectory files that may be deleted if self.save_traj == 1.
         self.last_traj = []
@@ -181,11 +183,14 @@ def read_data(self):
                 found_headings = True
                 headings = line
                 if len(set(headings)) != len(headings):
-                    raise Exception('Column headings in data.csv must be unique')
+                    logger.error('Column headings in data.csv must be unique\n')
+                    raise RuntimeError
                 if 'p' not in headings:
-                    raise Exception('There must be a pressure column heading labeled by "p" in data.csv')
+                    logger.error('There must be a pressure column heading labeled by "p" in data.csv\n')
+                    raise RuntimeError
                 if 't' not in headings:
-                    raise Exception('There must be a temperature column heading labeled by "t" in data.csv')
+                    logger.error('There must be a temperature column heading labeled by "t" in data.csv\n')
+                    raise RuntimeError
             elif found_headings:
                 try:
                     # Temperatures are in kelvin.
@@ -195,7 +200,8 @@ def read_data(self):
                     punit = [val.split()[1] if len(val.split()) >= 1 else "atm" for head, val in zip(headings,line) if head == 'p'][0]
                     unrec = set([punit]).difference(['atm','bar']) 
                     if len(unrec) > 0:
-                        raise Exception('The pressure unit %s is not recognized, please use bar or atm' % unrec[0])
+                        logger.error('The pressure unit %s is not recognized, please use bar or atm\n' % unrec[0])
+                        raise RuntimeError
                     # This line actually reads the reference data and inserts it into the RefData dictionary of dictionaries.
                     for head, val in zip(headings,line):
                         if head == 't' or head == 'p' : continue
@@ -207,17 +213,20 @@ def read_data(self):
                             self.RefData.setdefault(head,OrderedDict([]))[(t,pval,punit)] = False
                 except:
                     logger.error(line + '\n')
-                    raise Exception('Encountered an error reading this line!')
+                    logger.error('Encountered an error reading this line!\n')
+                    raise RuntimeError
             else:
                 logger.error(line + '\n')
-                raise Exception('I did not recognize this line!')
+                logger.error('I did not recognize this line!\n')
+                raise RuntimeError
         # Check the reference data table for validity.
         default_denoms = defaultdict(int)
         PhasePoints = None
         for head in self.RefData:
             if head not in known_vars+[i+"_wt" for i in known_vars]:
                 # Only hard-coded properties may be recognized.
-                raise Exception("The column heading %s is not recognized in data.csv" % head)
+                logger.error("The column heading %s is not recognized in data.csv\n" % head)
+                raise RuntimeError
             if head in known_vars:
                 if head+"_wt" not in self.RefData:
                     # If the phase-point weights are not specified in the reference data file, initialize them all to one.
@@ -470,7 +479,8 @@ def read(self, mvals, AGrad=True, AHess=True):
                     logger.warning('The file ./%s/npt_result.p does not exist so we cannot read it\n' % label)
                     pass
             if len(Points) == 0:
-                raise Exception('The liquid simulations have terminated with \x1b[1;91mno readable data\x1b[0m - this is a problem!')
+                logger.error('The liquid simulations have terminated with \x1b[1;91mno readable data\x1b[0m - this is a problem!\n')
+                raise RuntimeError
     
             # Assign variable names to all the stuff in npt_result.p
             Rhos, Vols, Potentials, Energies, Dips, Grads, GDips, mPotentials, mEnergies, mGrads, \
@@ -478,7 +488,8 @@ def read(self, mvals, AGrad=True, AHess=True):
             # Determine the number of molecules
             if len(set(NMols)) != 1:
                 logger.error(str(NMols))
-                raise Exception('The above list should only contain one number - the number of molecules')
+                logger.error('The above list should only contain one number - the number of molecules\n')
+                raise RuntimeError
             else:
                 NMol = list(set(NMols))[0]
         
@@ -581,7 +592,8 @@ def get(self, mvals, AGrad=True, AHess=True):
                 logger.warning('The file ./%s/npt_result.p does not exist so we cannot read it\n' % label)
                 pass
         if len(Points) == 0:
-            raise Exception('The liquid simulations have terminated with \x1b[1;91mno readable data\x1b[0m - this is a problem!')
+            logger.error('The liquid simulations have terminated with \x1b[1;91mno readable data\x1b[0m - this is a problem!\n')
+            raise RuntimeError
 
         # Assign variable names to all the stuff in npt_result.p
         Rhos, Vols, Potentials, Energies, Dips, Grads, GDips, mPotentials, mEnergies, mGrads, \
@@ -589,7 +601,8 @@ def get(self, mvals, AGrad=True, AHess=True):
         # Determine the number of molecules
         if len(set(NMols)) != 1:
             logger.error(str(NMols))
-            raise Exception('The above list should only contain one number - the number of molecules')
+            logger.error('The above list should only contain one number - the number of molecules\n')
+            raise RuntimeError
         else:
             NMol = list(set(NMols))[0]
     
@@ -627,7 +640,8 @@ def get(self, mvals, AGrad=True, AHess=True):
                           "Increasing simulation length: %i -> %i steps" % \
                           (Nrpt, self.liquid_md_steps, sumsteps), color=6)
             if self.liquid_md_steps * 2 != sumsteps:
-                raise RuntimeError("Spoo!")
+                logger.error("Spoo!\n")
+                raise RuntimeError
             self.liquid_eq_steps *= 2
             self.liquid_md_steps *= 2
             self.gas_eq_steps *= 2
@@ -787,17 +801,20 @@ def deprod(vec):
                     Hvap_grad[PT] -= GEPol
                 if hasattr(self,'use_cni') and self.use_cni:
                     if not ('cni' in self.RefData and self.RefData['cni'][PT]):
-                        raise RuntimeError('Asked for a nonideality correction but not provided in reference data (data.csv).  Either disable the option in data.csv or add data.')
+                        logger.error('Asked for a nonideality correction but not provided in reference data (data.csv).  Either disable the option in data.csv or add data.\n')
+                        raise RuntimeError
                     logger.debug("Adding % .3f to enthalpy of vaporization at " % self.RefData['cni'][PT] + str(PT) + '\n')
                     Hvap_calc[PT] += self.RefData['cni'][PT]
                 if hasattr(self,'use_cvib_intra') and self.use_cvib_intra:
                     if not ('cvib_intra' in self.RefData and self.RefData['cvib_intra'][PT]):
-                        raise RuntimeError('Asked for a quantum intramolecular vibrational correction but not provided in reference data (data.csv).  Either disable the option in data.csv or add data.')
+                        logger.error('Asked for a quantum intramolecular vibrational correction but not provided in reference data (data.csv).  Either disable the option in data.csv or add data.\n')
+                        raise RuntimeError
                     logger.debug("Adding % .3f to enthalpy of vaporization at " % self.RefData['cvib_intra'][PT] + str(PT) + '\n')
                     Hvap_calc[PT] += self.RefData['cvib_intra'][PT]
                 if hasattr(self,'use_cvib_inter') and self.use_cvib_inter:
                     if not ('cvib_inter' in self.RefData and self.RefData['cvib_inter'][PT]):
-                        raise RuntimeError('Asked for a quantum intermolecular vibrational correction but not provided in reference data (data.csv).  Either disable the option in data.csv or add data.')
+                        logger.error('Asked for a quantum intermolecular vibrational correction but not provided in reference data (data.csv).  Either disable the option in data.csv or add data.\n')
+                        raise RuntimeError
                     logger.debug("Adding % .3f to enthalpy of vaporization at " % self.RefData['cvib_inter'][PT] + str(PT) + '\n')
                     Hvap_calc[PT] += self.RefData['cvib_inter'][PT]
             else:
diff --git a/src/molecule.py b/src/molecule.py
index 99b87e70a..2f3ccbd34 100644
--- a/src/molecule.py
+++ b/src/molecule.py
@@ -246,9 +246,11 @@ def unmangle(M1, M2):
     M.elem = list(np.array(PDB.elem)[unmangled])
     """
     if len(M1) != 1 or len(M2) != 1:
-        raise RuntimeError("Unmangler only deals with length-1 molecule objects")
+        logger.error("Unmangler only deals with length-1 molecule objects\n")
+        raise RuntimeError
     if M1.na != M2.na:
-        raise RuntimeError("Unmangler only deals with same number of atoms")
+        logger.error("Unmangler only deals with same number of atoms\n")
+        raise RuntimeError
     unmangler = {}
     for i in range(M1.na):
         for j in range(M2.na):
@@ -256,7 +258,8 @@ def unmangle(M1, M2):
                 unmangler[j] = i
     unmangled = [unmangler[i] for i in sorted(unmangler.keys())]
     if len(unmangled) != M1.na:
-        raise RuntimeError("Unmangler failed (different structures?)")
+        logger.error("Unmangler failed (different structures?)\n")
+        raise RuntimeError
     return unmangled
 
 def nodematch(node1,node2):
@@ -720,7 +723,7 @@ def __getattr__(self, key):
                 return len(self.xyzs[0])
             else:
                 return 0
-            #raise Exception('na is ill-defined if the molecule has no AtomKeys member variables.')
+            #raise RuntimeError('na is ill-defined if the molecule has no AtomKeys member variables.')
         ## These attributes return a list of attribute names defined in this class that belong in the chosen category.
         ## For example: self.FrameKeys should return set(['xyzs','boxes']) if xyzs and boxes exist in self.Data
         elif key == 'FrameKeys':
@@ -761,7 +764,8 @@ def __getitem__(self, key):
                 New.Data[k] = copy.deepcopy(self.Data[k])
             return New
         else:
-            raise Exception('getitem is not implemented for keys of type %s' % str(key))
+            logger.error('getitem is not implemented for keys of type %s\n' % str(key))
+            raise RuntimeError
 
     def __delitem__(self, key):
         """ 
@@ -788,7 +792,8 @@ def __add__(self,other):
         """ Add method for Molecule objects. """
         # Check type of other
         if not isinstance(other,Molecule):
-            raise TypeError('A Molecule instance can only be added to another Molecule instance')
+            logger.error('A Molecule instance can only be added to another Molecule instance\n')
+            raise TypeError
         # Create the sum of the two classes by copying the first class.
         Sum = Molecule()
         for key in AtomVariableNames | MetaVariableNames:
@@ -799,7 +804,8 @@ def __add__(self,other):
             elif diff(self, other, key):
                 for i, j in zip(self.Data[key], other.Data[key]):
                     print i, j, i==j
-                raise Exception('The data member called %s is not the same for these two objects' % key)
+                logger.error('The data member called %s is not the same for these two objects\n' % key)
+                raise RuntimeError
             elif key in self.Data:
                 Sum.Data[key] = copy.deepcopy(self.Data[key])
             elif key in other.Data:
@@ -807,9 +813,11 @@ def __add__(self,other):
         for key in FrameVariableNames:
             if both(self, other, key):
                 if type(self.Data[key]) is not list:
-                    raise Exception('Key %s in self is a FrameKey, it must be a list' % key)
+                    logger.error('Key %s in self is a FrameKey, it must be a list\n' % key)
+                    raise RuntimeError
                 if type(other.Data[key]) is not list:
-                    raise Exception('Key %s in other is a FrameKey, it must be a list' % key)
+                    logger.error('Key %s in other is a FrameKey, it must be a list\n' % key)
+                    raise RuntimeError
                 Sum.Data[key] = list(self.Data[key] + other.Data[key])
             elif either(self, other, key):
                 # TINKER 6.3 compatibility - catch the specific case that one has a periodic box and the other doesn't.
@@ -819,21 +827,24 @@ def __add__(self,other):
                     elif key in other.Data:
                         self.Data['boxes'] = [other.Data['boxes'][0] for i in range(len(self))]
                 else:
-                    raise Exception('Key %s is a FrameKey, must exist in both self and other for them to be added (for now).' % key)
+                    logger.error('Key %s is a FrameKey, must exist in both self and other for them to be added (for now).\n' % key)
+                    raise RuntimeError
         return Sum
  
     def __iadd__(self,other):
         """ Add method for Molecule objects. """
         # Check type of other
         if not isinstance(other,Molecule):
-            raise TypeError('A Molecule instance can only be added to another Molecule instance')
+            logger.error('A Molecule instance can only be added to another Molecule instance\n')
+            raise TypeError
         # Create the sum of the two classes by copying the first class.
         for key in AtomVariableNames | MetaVariableNames:
             if key in ['fnm', 'ftype', 'bonds']: pass
             elif diff(self, other, key):
                 for i, j in zip(self.Data[key], other.Data[key]):
                     print i, j, i==j
-                raise Exception('The data member called %s is not the same for these two objects' % key)
+                logger.error('The data member called %s is not the same for these two objects\n' % key)
+                raise RuntimeError
             # Information from the other class is added to this class (if said info doesn't exist.)
             elif key in other.Data:
                 self.Data[key] = copy.deepcopy(other.Data[key])
@@ -841,9 +852,11 @@ def __iadd__(self,other):
         for key in FrameVariableNames:
             if both(self, other, key):
                 if type(self.Data[key]) is not list:
-                    raise Exception('Key %s in self is a FrameKey, it must be a list' % key)
+                    logger.error('Key %s in self is a FrameKey, it must be a list\n' % key)
+                    raise RuntimeError
                 if type(other.Data[key]) is not list:
-                    raise Exception('Key %s in other is a FrameKey, it must be a list' % key)
+                    logger.error('Key %s in other is a FrameKey, it must be a list\n' % key)
+                    raise RuntimeError
                 self.Data[key] += other.Data[key]
             elif either(self, other, key):
                 # TINKER 6.3 compatibility - catch the specific case that one has a periodic box and the other doesn't.
@@ -853,7 +866,8 @@ def __iadd__(self,other):
                     elif key in other.Data:
                         self.Data['boxes'] = [other.Data['boxes'][0] for i in range(len(self))]
                 else:
-                    raise Exception('Key %s is a FrameKey, must exist in both self and other for them to be added (for now).' % key)
+                    logger.error('Key %s is a FrameKey, must exist in both self and other for them to be added (for now).\n' % key)
+                    raise RuntimeError
         return self
 
     def repair(self, key, klast):
@@ -868,9 +882,10 @@ def repair(self, key, klast):
             # If we only have one box then we can fill in the rest of the trajectory.
             for i in range(diff): self.Data['boxes'].append(self.Data['boxes'][-1])
         else:
-            raise Exception('The keys %s and %s have different lengths (%i %i)'
-                            '- this isn\'t supposed to happen for two AtomKeys member variables.' \
-                                % (key, klast, len(self.Data[key]), len(self.Data[klast])))
+            logger.error('The keys %s and %s have different lengths (%i %i)'
+                         '- this isn\'t supposed to happen for two AtomKeys member variables.' 
+                         % (key, klast, len(self.Data[key]), len(self.Data[klast])))
+            raise RuntimeError
 
     def reorder_according_to(self, other):
 
@@ -966,7 +981,8 @@ def __init__(self, fnm = None, ftype = None, positive_resid=True, build_topology
                 ## Try to determine from the file name using the extension.
                 ftype = os.path.splitext(fnm)[1][1:]
             if not os.path.exists(fnm):
-                raise IOError('Tried to create Molecule object from a file that does not exist: %s' % fnm)
+                logger.error('Tried to create Molecule object from a file that does not exist: %s\n' % fnm)
+                raise IOError
             self.Data['ftype'] = ftype
             ## Actually read the file.
             Parsed = self.Read_Tab[self.Funnel[ftype.lower()]](fnm, **kwargs)
@@ -1000,7 +1016,8 @@ def __init__(self, fnm = None, ftype = None, positive_resid=True, build_topology
     def require(self, *args):
         for arg in args:
             if arg not in self.Data:
-                raise Exception("%s is a required attribute for writing this type of file but it's not present" % arg)
+                logger.error("%s is a required attribute for writing this type of file but it's not present\n" % arg)
+                raise RuntimeError
 
     # def read(self, fnm, ftype = None):
     #     """ Read in a file. """
@@ -1014,7 +1031,8 @@ def require(self, *args):
 
     def write(self,fnm=None,ftype=None,append=False,select=None,**kwargs):
         if fnm == None and ftype == None:
-            raise Exception("Output file name and file type are not specified.")
+            logger.error("Output file name and file type are not specified.\n")
+            raise RuntimeError
         elif ftype == None:
             ftype = os.path.splitext(fnm)[1][1:]
         ## Fill in comments.
@@ -1113,7 +1131,8 @@ def rigid_water(self):
     def load_frames(self, fnm):
         NewMol = Molecule(fnm)
         if NewMol.na != self.na:
-            raise Exception('When loading frames, don\'t change the number of atoms.')
+            logger.error('When loading frames, don\'t change the number of atoms.\n')
+            raise RuntimeError
         for key in NewMol.FrameKeys:
             self.Data[key] = NewMol.Data[key]
 
@@ -1140,7 +1159,8 @@ def add_quantum(self, other):
             OtherMol = Molecule(other)
         for key in OtherMol.QuantumKeys:
             if key in AtomVariableNames and len(OtherMol.Data[key]) != self.na:
-                raise Exception('The quantum-key %s is AtomData, but it doesn\'t have the same number of atoms as the Molecule object we\'re adding it to.')
+                logger.error('The quantum-key %s is AtomData, but it doesn\'t have the same number of atoms as the Molecule object we\'re adding it to.')
+                raise RuntimeError
             self.Data[key] = copy.deepcopy(OtherMol.Data[key])
 
     def add_virtual_site(self, idx, **kwargs):
@@ -1149,7 +1169,8 @@ def add_virtual_site(self, idx, **kwargs):
             if key in kwargs:
                 self.Data[key].insert(idx,kwargs[key])
             else:
-                raise Exception('You need to specify %s when adding a virtual site to this molecule.' % key)
+                logger.error('You need to specify %s when adding a virtual site to this molecule.\n' % key)
+                raise RuntimeError
         if 'xyzs' in self.Data:
             for i, xyz in enumerate(self.xyzs):
                 if 'pos' in kwargs:
@@ -1157,7 +1178,8 @@ def add_virtual_site(self, idx, **kwargs):
                 else:
                     self.xyzs[i] = np.insert(xyz, idx, 0.0, axis=0)
         else:
-            raise Exception('You need to have xyzs in this molecule to add a virtual site.')
+            logger.error('You need to have xyzs in this molecule to add a virtual site.\n')
+            raise RuntimeError
 
     def replace_peratom(self, key, orig, want):
         """ Replace all of the data for a certain attribute in the system from orig to want. """
@@ -1166,7 +1188,8 @@ def replace_peratom(self, key, orig, want):
                 if self.Data[key][i] == orig:
                     self.Data[key][i] = want
         else:
-            raise Exception('The key that we want to replace (%s) doesn\'t exist.' % key)
+            logger.error('The key that we want to replace (%s) doesn\'t exist.\n' % key)
+            raise RuntimeError
 
     def replace_peratom_conditional(self, key1, cond, key2, orig, want):
         """ Replace all of the data for a attribute key2 from orig to want, contingent on key1 being equal to cond. 
@@ -1176,7 +1199,8 @@ def replace_peratom_conditional(self, key1, cond, key2, orig, want):
                 if self.Data[key2][i] == orig and self.Data[key1][i] == cond:
                     self.Data[key2][i] = want
         else:
-            raise Exception('Either the comparison or replacement key (%s, %s) doesn\'t exist.' % (key1, key2))
+            logger.error('Either the comparison or replacement key (%s, %s) doesn\'t exist.\n' % (key1, key2))
+            raise RuntimeError
 
     def atom_select(self,atomslice):
         """ Return a copy of the object with certain atoms selected.  Takes an integer, list or array as argument. """
@@ -1219,7 +1243,8 @@ def atom_select(self,atomslice):
     def atom_stack(self, other):
         """ Return a copy of the object with another molecule object appended.  WARNING: This function may invalidate stuff like QM energies. """
         if len(other) != len(self):
-            raise Exception('The number of frames of the Molecule objects being stacked are not equal.')
+            logger.error('The number of frames of the Molecule objects being stacked are not equal.\n')
+            raise RuntimeError
 
         New = Molecule()
         for key in self.FrameKeys | self.MetaKeys:
@@ -1235,7 +1260,8 @@ def FrameStack(k):
         # Now build the new atom keys.
         for key in self.AtomKeys:
             if key not in other.Data:
-                raise Exception('Trying to stack two Molecule objects - the first object contains %s and the other does not' % (key))
+                logger.error('Trying to stack two Molecule objects - the first object contains %s and the other does not\n' % (key))
+                raise RuntimeError
             if key == 'tinkersuf': # Tinker suffix is a bit tricky
                 NewSuf = []
                 for line in other.Data[key]:
@@ -1252,7 +1278,8 @@ def FrameStack(k):
                 elif type(self.Data[key]) is list:
                     New.Data[key] = self.Data[key] + other.Data[key]
                 else:
-                    raise Exception('Cannot stack %s because it is of type %s' % (key, str(type(New.Data[key]))))
+                    logger.error('Cannot stack %s because it is of type %s\n' % (key, str(type(New.Data[key]))))
+                    raise RuntimeError
         if 'bonds' in self.Data and 'bonds' in other.Data:
             New.Data['bonds'] = self.bonds + [(b[0]+self.na, b[1]+self.na) for b in other.bonds]
         return New
@@ -1285,7 +1312,8 @@ def align(self, smooth = False, center = True, center_mass = False, select=None)
         if isinstance(select, list):
             select = np.array(select)
         if center and center_mass:
-            raise Exception('Specify center=True or center_mass=True but set the other one to False')
+            logger.error('Specify center=True or center_mass=True but set the other one to False\n')
+            raise RuntimeError
 
         coms = self.center_of_mass()
         xyz1 = self.xyzs[0]
@@ -1461,7 +1489,8 @@ def find_angles(self):
         dipeptide when comparing to TINKER's analyze program. """
 
         if not hasattr(self, 'topology'):
-            raise RuntimeError("Need to have built a topology to find angles")
+            logger.error("Need to have built a topology to find angles\n")
+            raise RuntimeError
 
         angidx = []
         # Iterate over separate molecules
@@ -1486,7 +1515,8 @@ def find_dihedrals(self):
         program. """
         
         if not hasattr(self, 'topology'):
-            raise RuntimeError("Need to have built a topology to find dihedrals")
+            logger.error("Need to have built a topology to find dihedrals\n")
+            raise RuntimeError
 
         dihidx = []
         # Iterate over separate molecules
@@ -1806,7 +1836,8 @@ def read_dcd(self, fnm, **kwargs):
         xyzs = []
         boxes = []
         if _dcdlib.vmdplugin_init() != 0:
-            raise IOError("Unable to init DCD plugin")
+            logger.error("Unable to init DCD plugin\n")
+            raise IOError
         natoms = c_int(-1)
         frame  = 0
         dcd       = _dcdlib.open_dcd_read(fnm, "dcd", byref(natoms))
@@ -2393,7 +2424,8 @@ def read_qcout(self, fnm, errok = [], **kwargs):
                     Answer['qcerr'] = line.strip()
                     fatal = 0
                 else:
-                    raise Exception('Calculation encountered a fatal error! (%s)' % line)
+                    logger.error('Calculation encountered a fatal error! (%s)\n' % line)
+                    raise RuntimeError
             if 'Q-Chem fatal error' in line:
                 fatal = 1
             if XMode >= 1:
@@ -2407,7 +2439,8 @@ def read_qcout(self, fnm, errok = [], **kwargs):
                     if elem == []:
                         elem = elemThis
                     elif elem != elemThis:
-                        raise Exception('Q-Chem output parser will not work if successive calculations have different numbers of atoms!')
+                        logger.error('Q-Chem output parser will not work if successive calculations have different numbers of atoms!\n')
+                        raise RuntimeError
                     elemThis = []
                     xyzs.append(np.array(xyz))
                     xyz  = []
@@ -2520,7 +2553,7 @@ def read_qcout(self, fnm, errok = [], **kwargs):
         if len(Mats['hessian_scf']['All']) > 0:
             Answer['qm_hessians'] = Mats['hessian_scf']['All']
         #else:
-        #    raise Exception('There are no forces in %s' % fnm)
+        #    raise RuntimeError('There are no forces in %s' % fnm)
         # Also work our way down with the energies.
         if len(Floats['energy_ccsdt']) > 0:
             Answer['qm_energies'] = Floats['energy_ccsdt']
@@ -2530,20 +2563,24 @@ def read_qcout(self, fnm, errok = [], **kwargs):
             Answer['qm_energies'] = Floats['energy_mp2']
         elif len(energy_scf) > 0:
             if 'correlation' in Answer['qcrems'][0] and Answer['qcrems'][0]['correlation'].lower() in ['mp2', 'rimp2', 'ccsd', 'ccsd(t)']:
-                raise Exception("Q-Chem was called with a post-HF theory but we only got the SCF energy")
+                logger.error("Q-Chem was called with a post-HF theory but we only got the SCF energy\n")
+                raise RuntimeError
             Answer['qm_energies'] = energy_scf
         elif 'SCF failed to converge' not in errok:
-            raise Exception('There are no energies in %s' % fnm)
+            logger.error('There are no energies in %s\n' % fnm)
+            raise RuntimeError
     
         #### Sanity checks
         # We currently don't have a graceful way of dealing with SCF convergence failures in the output file.
         # For instance, a failed calculation will have elem / xyz but no forces. :/
         if 0 in conv and 'SCF failed to converge' not in errok:
-            raise Exception('SCF convergence failure encountered in parsing %s' % fnm)
+            logger.error('SCF convergence failure encountered in parsing %s\n' % fnm)
+            raise RuntimeError
         elif (0 not in conv):
             # The molecule should have only one charge and one multiplicity
             if len(set(Floats['charge'])) != 1 or len(set(Floats['mult'])) != 1:
-                raise Exception('Unexpected number of charges or multiplicities in parsing %s' % fnm)
+                logger.error('Unexpected number of charges or multiplicities in parsing %s\n' % fnm)
+                raise RuntimeError
 
         # If we have any QM energies (not the case if SCF convergence failure)
         if 'qm_energies' in Answer:
@@ -2558,11 +2595,13 @@ def read_qcout(self, fnm, errok = [], **kwargs):
                     mkspn.append([0.0 for j in mkchg[-1]])
             lens = [len(i) for i in Answer['qm_energies'], Answer['xyzs']]
             if len(set(lens)) != 1:
-                raise Exception('The number of energies and coordinates in %s are not the same : %s' % (fnm, str(lens)))
+                logger.error('The number of energies and coordinates in %s are not the same : %s\n' % (fnm, str(lens)))
+                raise RuntimeError
 
         # The number of atoms should all be the same
         if len(set([len(i) for i in Answer['xyzs']])) > 1:
-            raise Exception('The numbers of atoms across frames in %s are not all the same' % (fnm))
+            logger.error('The numbers of atoms across frames in %s are not all the same\n' % (fnm))
+            raise RuntimeError
 
         if 'qm_forces' in Answer:
             for i, frc in enumerate(Answer['qm_forces']):
@@ -2614,7 +2653,8 @@ def write_qcin(self, select, **kwargs):
                 if 'jobtype' in self.qcrems[remidx] and self.qcrems[remidx]['jobtype'].lower() == 'fsm':
                     fsm = True
                     if len(select) != 2:
-                        raise RuntimeError('For freezing string method, please provide two structures only.')
+                        logger.error('For freezing string method, please provide two structures only.\n')
+                        raise RuntimeError
                 if SectName != '@@@@':
                     out.append('$%s' % SectName)
                     for line in SectData:
@@ -2743,7 +2783,8 @@ def write_gro(self, select, **kwargs):
 
     def write_dcd(self, select, **kwargs):
         if _dcdlib.vmdplugin_init() != 0:
-            raise IOError("Unable to init DCD plugin")
+            logger.error("Unable to init DCD plugin\n")
+            raise IOError
         natoms    = c_int(self.na)
         dcd       = _dcdlib.open_dcd_write(self.fout, "dcd", natoms)
         ts        = MolfileTimestep()
@@ -2756,7 +2797,8 @@ def write_dcd(self, select, **kwargs):
             ts.C      = self.boxes[I].c if 'boxes' in self.Data else 1.0
             result    = _dcdlib.write_timestep(dcd, byref(ts))
             if result != 0:
-                raise IOError("Error encountered when writing DCD")
+                logger.error("Error encountered when writing DCD\n")
+                raise IOError
         ## Close the DCD file
         _dcdlib.close_file_write(dcd)
         dcd = None
@@ -2968,7 +3010,8 @@ def buildbox(line):
                 v3 = np.array([s[7], s[8], s[2]])
                 return BuildLatticeFromVectors(v1, v2, v3)
             else:
-                raise Exception("Not sure what to do since you gave me %i numbers" % len(s))
+                logger.error("Not sure what to do since you gave me %i numbers\n" % len(s))
+                raise RuntimeError
             
         if 'boxes' not in self.Data or len(self.boxes) != self.ns:
             sys.stderr.write("Please specify the periodic box using:\n")
@@ -2981,7 +3024,8 @@ def buildbox(line):
             if os.path.exists(boxstr):
                 boxfile = open(boxstr).readlines()
                 if len(boxfile) != len(self):
-                    raise Exception('Tried to read in the box file, but it has a different length from the number of frames.')
+                    logger.error('Tried to read in the box file, but it has a different length from the number of frames.\n')
+                    raise RuntimeError
                 else:
                     self.boxes = [buildbox(line) for line in boxfile]
             else:
diff --git a/src/moments.py b/src/moments.py
index 924d41f31..c249a0ddf 100644
--- a/src/moments.py
+++ b/src/moments.py
@@ -119,7 +119,8 @@ def read_reference_data(self):
                 self.ref_moments['polarizability']['zz'] = float(s[2])
             else:
                 logger.info("%s\n" % line)
-                raise Exception("This line doesn't comply with our multipole file format!")
+                logger.error("This line doesn't comply with our multipole file format!\n")
+                raise RuntimeError
             ln += 1
         # Subtract the trace of the quadrupole moment.
         if 'quadrupole' in self.ref_moments:
diff --git a/src/nifty.py b/src/nifty.py
index 68b944e8d..3fcc29d3b 100644
--- a/src/nifty.py
+++ b/src/nifty.py
@@ -124,7 +124,8 @@ def uncommadash(s):
             logger.warning("List is out of order\n")
             raise
     except:
-        raise Exception('Invalid string for converting to list of numbers: %s' % s)
+        logger.error('Invalid string for converting to list of numbers: %s\n' % s)
+        raise RuntimeError
     return L
 
 def extract_int(arr, avgthre, limthre, label="value", verbose=True):
@@ -477,7 +478,8 @@ def statisticalInefficiency(A_n, B_n=None, fast=False, mintime=3, warn=True):
     N = A_n.shape[0]
     # Be sure A_n and B_n have the same dimensions.
     if(A_n.shape != B_n.shape):
-        raise ParameterError('A_n and B_n must have same dimensions.')
+        logger.error('A_n and B_n must have same dimensions.\n')
+        raise ParameterError
     # Initialize statistical inefficiency estimate with uncorrelated value.
     g = 1.0
     # Compute mean of each timeseries.
@@ -574,11 +576,13 @@ def load_etree(self):
                 for q in "\"'": # double or single quote
                     if rep.startswith(q):
                         if not rep.endswith(q):
-                            raise ValueError, "insecure string pickle"
+                            logger.error("insecure string pickle\n")
+                            raise ValueError
                         rep = rep[len(q):-len(q)]
                         break
                 else:
-                    raise ValueError, "insecure string pickle"
+                    logger.error("insecure string pickle\n")
+                    raise ValueError
                 ## The string is converted to an _ElementTree type before it is finally loaded.
                 self.append(etree.ElementTree(etree.fromstring(rep.decode("string-escape"))))
             except:
@@ -817,7 +821,9 @@ def onefile(ext, arg=None):
 def GoInto(Dir):
     if os.path.exists(Dir):
         if os.path.isdir(Dir): pass
-        else: raise Exception("Tried to create directory %s, it exists but isn't a directory" % newdir)
+        else: 
+            logger.error("Tried to create directory %s, it exists but isn't a directory\n" % newdir)
+            raise RuntimeError
     else:
         os.makedirs(Dir)
     os.chdir(Dir)
@@ -830,7 +836,8 @@ def allsplit(Dir):
 
 def Leave(Dir):
     if os.path.split(os.getcwd())[1] != Dir:
-        raise Exception("Trying to leave directory %s, but we're actually in directory %s (check your code)" % (Dir,os.path.split(os.getcwd())[1]))
+        logger.error("Trying to leave directory %s, but we're actually in directory %s (check your code)\n" % (Dir,os.path.split(os.getcwd())[1]))
+        raise RuntimeError
     for i in range(len(allsplit(Dir))):
         os.chdir('..')
 
@@ -871,23 +878,28 @@ def LinkFile(src, dest, nosrcok = False):
     if os.path.exists(src):
         if os.path.exists(dest):
             if os.path.islink(dest): pass
-            else: raise Exception("Tried to create symbolic link %s to %s, destination exists but isn't a symbolic link" % (src, dest))
+            else: 
+                logger.error("Tried to create symbolic link %s to %s, destination exists but isn't a symbolic link\n" % (src, dest))
+                raise RuntimeError
         else:
             os.symlink(src, dest)
     else:
         if not nosrcok:
-            raise Exception("Tried to create symbolic link %s to %s, but source file doesn't exist%s" % (src,dest,MissingFileInspection(src)))
+            logger.error("Tried to create symbolic link %s to %s, but source file doesn't exist%s\n" % (src,dest,MissingFileInspection(src)))
+            raise RuntimeError
     
 
 def CopyFile(src, dest):
     if os.path.exists(src):
         if os.path.exists(dest):
             if os.path.islink(dest): 
-                raise Exception("Tried to copy %s to %s, destination exists but it's a symbolic link" % (src, dest))
+                logger.error("Tried to copy %s to %s, destination exists but it's a symbolic link\n" % (src, dest))
+                raise RuntimeError
         else:
             shutil.copy2(src, dest)
     else:
-        raise Exception("Tried to copy %s to %s, but source file doesn't exist%s" % (src,dest,MissingFileInspection(src)))
+        logger.error("Tried to copy %s to %s, but source file doesn't exist%s\n" % (src,dest,MissingFileInspection(src)))
+        raise RuntimeError
 
 def link_dir_contents(abssrcdir, absdestdir):
     for fnm in os.listdir(abssrcdir):
@@ -1050,7 +1062,8 @@ def process_err(read):
             # This code (commented out) would not throw an exception, but instead exit with the returncode of the crashed program.
             # sys.stderr.write("\x1b[1;94m%s\x1b[0m gave a return code of %i (\x1b[91mit may have crashed\x1b[0m)\n" % (command, p.returncode))
             # sys.exit(p.returncode)
-            raise Exception("\x1b[1;94m%s\x1b[0m gave a return code of %i (\x1b[91mit may have crashed\x1b[0m)\n" % (command, p.returncode))
+            logger.error("\x1b[1;94m%s\x1b[0m gave a return code of %i (\x1b[91mit may have crashed\x1b[0m)\n\n" % (command, p.returncode))
+            raise RuntimeError
         
     # Return the output in the form of a list of lines, so we can loop over it using "for line in output".
     Out = process_out.stdout.split('\n')
diff --git a/src/objective.py b/src/objective.py
index e68373734..dd04b9242 100644
--- a/src/objective.py
+++ b/src/objective.py
@@ -129,13 +129,15 @@ def __init__(self, options, tgt_opts, forcefield):
         self.Targets = []
         for opts in tgt_opts:
             if opts['type'] not in Implemented_Targets:
-                raise RuntimeError('The target type \x1b[1;91m%s\x1b[0m is not implemented!' % opts['type'])
+                logger.error('The target type \x1b[1;91m%s\x1b[0m is not implemented!\n' % opts['type'])
+                raise RuntimeError
             if opts["remote"]: Tgt = forcebalance.target.RemoteTarget(options, opts, forcefield)
             else: Tgt = Implemented_Targets[opts['type']](options,opts,forcefield)
             self.Targets.append(Tgt)
             printcool_dictionary(Tgt.PrintOptionDict,"Setup for target %s :" % Tgt.name)
         if len(set([Tgt.name for Tgt in self.Targets])) != len([Tgt.name for Tgt in self.Targets]):
-            raise Exception("The list of target names is not unique!")
+            logger.error("The list of target names is not unique!\n")
+            raise RuntimeError
         ## The force field (it seems to be everywhere)
         self.FF = forcefield
         ## Initialize the penalty function.
diff --git a/src/openmmio.py b/src/openmmio.py
index d9937d3ec..2d501f145 100644
--- a/src/openmmio.py
+++ b/src/openmmio.py
@@ -478,7 +478,8 @@ def setopts(self, platname="CUDA", precision="single", **kwargs):
         self.precision = self.precision.lower()
         valprecs = ['single','mixed','double']
         if self.precision not in valprecs:
-            raise RuntimeError("Please specify one of %s for precision" % valprecs)
+            logger.error("Please specify one of %s for precision\n" % valprecs)
+            raise RuntimeError
         ## Set the simulation platform
         if self.verbose: logger.info("Setting Platform to %s\n" % self.platname)
         self.platform = Platform.getPlatformByName(self.platname)
@@ -508,14 +509,17 @@ def readsrc(self, **kwargs):
         if 'pdb' in kwargs and os.path.exists(kwargs['pdb']):
             # Case 1. The PDB file name is provided explicitly
             pdbfnm = kwargs['pdb']
-            if not os.path.exists(pdbfnm): raise RuntimeError("%s specified but doesn't exist" % pdbfnm)
+            if not os.path.exists(pdbfnm): 
+                logger.error("%s specified but doesn't exist\n" % pdbfnm)
+                raise RuntimeError
 
         if 'mol' in kwargs:
             self.mol = kwargs['mol']
         elif 'coords' in kwargs:
             self.mol = Molecule(kwargs['coords'])
         else:
-            raise RuntimeError('Must provide either a molecule object or coordinate file.')
+            logger.error('Must provide either a molecule object or coordinate file.\n')
+            raise RuntimeError
 
         if pdbfnm != None:
             mpdb = Molecule(pdbfnm)
@@ -543,7 +547,8 @@ def prepare(self, pbc=False, mmopts={}, **kwargs):
         else:
             if 'ffxml' in kwargs:
                 if not os.path.exists(kwargs['ffxml']): 
-                    raise RuntimeError("%s doesn't exist" % kwargs['ffxml'])
+                    logger.error("%s doesn't exist\n" % kwargs['ffxml'])
+                    raise RuntimeError
                 self.ffxml = kwargs['ffxml']
             elif onefile('xml'):
                 self.ffxml = onefile('xml')
@@ -559,7 +564,8 @@ def prepare(self, pbc=False, mmopts={}, **kwargs):
         if hasattr(self,'FF'):
             if self.AMOEBA:
                 if self.FF.amoeba_pol == None:
-                    raise RuntimeError('You must specify amoeba_pol if there are any AMOEBA forces.')
+                    logger.error('You must specify amoeba_pol if there are any AMOEBA forces.\n')
+                    raise RuntimeError
                 if self.FF.amoeba_pol == 'mutual':
                     self.mmopts['polarization'] = 'mutual'
                     self.mmopts.setdefault('mutualInducedTargetEpsilon', self.FF.amoeba_eps if self.FF.amoeba_eps != None else 1e-6)
@@ -597,7 +603,8 @@ def prepare(self, pbc=False, mmopts={}, **kwargs):
             if self.pbc:
                 # Obtain the periodic box
                 if self.mol.boxes[I].alpha != 90.0 or self.mol.boxes[I].beta != 90.0 or self.mol.boxes[I].gamma != 90.0:
-                    raise RuntimeError('OpenMM cannot handle nonorthogonal boxes.')
+                    logger.error('OpenMM cannot handle nonorthogonal boxes.\n')
+                    raise RuntimeError
                 box_omm = [Vec3(self.mol.boxes[I].a, 0, 0)*angstrom, 
                            Vec3(0, self.mol.boxes[I].b, 0)*angstrom, 
                            Vec3(0, 0, self.mol.boxes[I].c)*angstrom]
@@ -636,7 +643,8 @@ def create_simulation(self, timestep=1.0, faststep=0.25, temperature=None, press
             ## If temperature control is turned on, then run Langevin dynamics.
             if mts:
                 if rpmd_beads > 0:
-                    raise RuntimeError("No multiple timestep integrator without temperature control.")
+                    logger.error("No multiple timestep integrator without temperature control.\n")
+                    raise RuntimeError
                 integrator = MTSVVVRIntegrator(temperature*kelvin, collision/picosecond,
                                                timestep*femtosecond, self.system, ninnersteps=int(timestep/faststep))
             else:
@@ -649,7 +657,8 @@ def create_simulation(self, timestep=1.0, faststep=0.25, temperature=None, press
         else:
             ## If no temperature control, default to the Verlet integrator.
             if rpmd_beads > 0:
-                raise RuntimeError("No RPMD integrator without temperature control.")
+                logger.error("No RPMD integrator without temperature control.\n")
+                raise RuntimeError
             if mts: warn_once("No multiple timestep integrator without temperature control.")
             integrator = VerletIntegrator(timestep*femtoseconds)
 
@@ -827,7 +836,8 @@ def energy_dipole(self):
         return np.hstack((Result["Energy"].reshape(-1,1), Result["Dipole"]))
 
     def normal_modes(self, shot=0, optimize=True):
-        raise NotImplementedError("OpenMM cannot do normal mode analysis")
+        logger.error("OpenMM cannot do normal mode analysis\n")
+        raise NotImplementedError
 
     def optimize(self, shot=0, crit=1e-4):
 
@@ -865,7 +875,8 @@ def multipole_moments(self, shot=0, optimize=True, polarizability=False):
         self.update_simulation()
 
         if polarizability:
-            raise NotImplementedError("Polarizability calculation is available in TINKER only.")
+            logger.error("Polarizability calculation is available in TINKER only.\n")
+            raise NotImplementedError
 
         if optimize: self.optimize(shot)
         else: self.set_positions(shot)
@@ -906,7 +917,8 @@ def interaction_energy(self, fraga, fragb):
         self.update_simulation()
 
         if self.name == 'A' or self.name == 'B':
-            raise RuntimeError("Don't name the engine A or B!")
+            logger.error("Don't name the engine A or B!\n")
+            raise RuntimeError
 
         # Create two subengines.
         if hasattr(self,'target'):
@@ -953,11 +965,13 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None,
         """
 
         if float(int(float(nequil)/float(nsave))) != float(nequil)/float(nsave):
-            raise RuntimeError("Please set nequil to an integer multiple of nsave")
+            logger.error("Please set nequil to an integer multiple of nsave\n")
+            raise RuntimeError
         iequil = nequil/nsave
 
         if float(int(float(nsteps)/float(nsave))) != float(nsteps)/float(nsave):
-            raise RuntimeError("Please set nsteps to an integer multiple of nsave")
+            logger.error("Please set nsteps to an integer multiple of nsave\n")
+            raise RuntimeError
         isteps = nsteps/nsave
 
         if hasattr(self, 'simulation'):
diff --git a/src/optimizer.py b/src/optimizer.py
index 2b0c4535f..caf2bdd7d 100644
--- a/src/optimizer.py
+++ b/src/optimizer.py
@@ -269,7 +269,9 @@ def save_mvals_to_input(self, mvals):
                 if in_mvals: continue
                 print >> fout, line,
                 if line1.startswith("read_mvals"):
-                    if have_mvals: raise RuntimeError("Encountered more than one read_mvals section")
+                    if have_mvals: 
+                        logger.error("Encountered more than one read_mvals section\n")
+                        raise RuntimeError
                     have_mvals = 1
                     in_mvals = 1
                     print >> fout, self.FF.sprint_map(mvals, precision=8)
diff --git a/src/output.py b/src/output.py
index 7eb352274..e42e38cd8 100644
--- a/src/output.py
+++ b/src/output.py
@@ -76,6 +76,7 @@ def emit(self, record):
 
 class ModLogger(Logger):
     def error(self, msg, *args, **kwargs):
+        msg = '\n'.join(['\x1b[91m%s\x1b[0m' % s for s in msg.split('\n') if len(s.strip()) > 0])+'\n'
         for hdlr in (self.parent.handlers if self.propagate else self.handlers):
             hdlr.savestream = hdlr.stream
             hdlr.stream = sys.stderr
diff --git a/src/parser.py b/src/parser.py
index a1df3c4e1..40b90a8df 100644
--- a/src/parser.py
+++ b/src/parser.py
@@ -252,7 +252,8 @@
         if i in dct:
             iocc.append("gen_opt_types %s" % typ)
     if len(iocc) != 1:
-        raise RuntimeError("CODING ERROR: ForceBalance option %s occurs in more than one place (%s)" % (i, str(iocc)))
+        logger.error("CODING ERROR: ForceBalance option %s occurs in more than one place (%s)\n" % (i, str(iocc)))
+        raise RuntimeError
 
 ## Default general options - basically a collapsed veresion of gen_opts_types.
 gen_opts_defaults = {}
@@ -499,7 +500,8 @@ def parse_inputs(input_file=None):
                     elif isfloat(s[1]) and int(float(s[1])) == 1:
                         this_opt[key] = True
                     else:
-                        raise RuntimeError('%s is a true/false option but you provided %s; to enable, provide ["1", "yes", "true", "on" or <no value>].  To disable, provide ["0", "no", "false", or "off"].' % (key, s[1]))
+                        logger.error('%s is a true/false option but you provided %s; to enable, provide ["1", "yes", "true", "on" or <no value>].  To disable, provide ["0", "no", "false", or "off"].\n' % (key, s[1]))
+                        raise RuntimeError
                 elif key in opts_types['floats']:
                     this_opt[key] = float(s[1])
                 elif key in opts_types['sections']:
diff --git a/src/quantity.py b/src/quantity.py
index 677844be7..44b137068 100644
--- a/src/quantity.py
+++ b/src/quantity.py
@@ -120,8 +120,9 @@ def extract(self, engines, FF, mvals, h, AGrad=True):
             being fitted. 
         
         """
-        raise NotImplementedError(("Extract method not implemented"
-                                  " in base class."))    
+        logger.error("Extract method not implemented in base class.\n")    
+        raise NotImplementedError
+
 # class Quantity_Density
 class Quantity_Density(Quantity):
     def __init__(self, engname, temperature, pressure, name=None):
diff --git a/src/target.py b/src/target.py
index 0d4343c91..cb79b6006 100644
--- a/src/target.py
+++ b/src/target.py
@@ -143,7 +143,8 @@ def __init__(self,options,tgt_opts,forcefield):
             _exec("tar xvzf targets.tar.gz")
             tgtdir = 'targets'
         else:
-            raise Exception('\x1b[91mThe targets directory is missing!\x1b[0m\nDid you finish setting up the target data?\nPlace the data in a directory called "targets" or "simulations"')
+            logger.error('\x1b[91mThe targets directory is missing!\x1b[0m\nDid you finish setting up the target data?\nPlace the data in a directory called "targets" or "simulations"\n')
+            raise RuntimeError
         self.set_option(None, None, 'tgtdir', os.path.join(tgtdir,self.name))
         ## Temporary (working) directory; it is temp/(target_name)
         ## Used for storing temporary variables that don't change through the course of the optimization
@@ -360,7 +361,8 @@ def get(self,mvals,AGrad=False,AHess=False):
 
         """
         
-        raise NotImplementedError('The get method is not implemented in the Target base class')
+        logger.error('The get method is not implemented in the Target base class\n')
+        raise NotImplementedError
 
     def check_files(self, there):
 
@@ -394,9 +396,11 @@ def absrd(self, inum=None):
         """
         
         if Counter() > First():
-            raise RuntimeError("Iteration number of this run must be %s to read data from disk (it is %s)" % (First(), Counter()))
+            logger.error("Iteration number of this run must be %s to read data from disk (it is %s)\n" % (First(), Counter()))
+            raise RuntimeError
         if self.rd == None:
-            raise RuntimeError("The directory for reading is not set")
+            logger.error("The directory for reading is not set\n")
+            raise RuntimeError
 
         # Current directory. Move back into here after reading data.
         here = os.getcwd()
@@ -407,7 +411,8 @@ def absrd(self, inum=None):
             abs_rd = os.path.join(self.root, self.rd)
         # Check for directory existence.
         if not os.path.exists(abs_rd):
-            raise RuntimeError("Provided path %s does not exist" % self.rd)
+            logger.error("Provided path %s does not exist\n" % self.rd)
+            raise RuntimeError
         # Figure out which directory to go into.
         s = os.path.split(self.rd)
         have_data = 0
@@ -415,7 +420,8 @@ def absrd(self, inum=None):
             # Case 1: User has provided a specific directory to read from.
             there = abs_rd
             if not self.check_files(there):
-                raise RuntimeError("Provided path %s does not contain remote target output" % self.rd)
+                logger.error("Provided path %s does not contain remote target output\n" % self.rd)
+                raise RuntimeError
             have_data = 1
         elif s[-1] == self.name:
             # Case 2: User has provided the target name.
@@ -428,7 +434,8 @@ def absrd(self, inum=None):
         else:
             # Case 3: User has provided something else (must contain the target name in the next directory down.)
             if not os.path.exists(os.path.join(abs_rd, self.name)):
-                raise RuntimeError("Target directory %s does not exist in %s" % (self.name, self.rd))
+                logger.error("Target directory %s does not exist in %s\n" % (self.name, self.rd))
+                raise RuntimeError
             iterints = [int(d.replace('iter_','')) for d in os.listdir(os.path.join(abs_rd, self.name)) if os.path.isdir(os.path.join(abs_rd, self.name, d))]
             for i in sorted(iterints)[::-1]:
                 there = os.path.join(abs_rd, self.name, 'iter_%04i' % i)
@@ -436,7 +443,8 @@ def absrd(self, inum=None):
                     have_data = 1
                     break
         if not have_data:
-            raise RuntimeError("Did not find data to read in %s" % self.rd)
+            logger.error("Did not find data to read in %s\n" % self.rd)
+            raise RuntimeError
 
         if inum != None:
             there = os.path.join(os.path.split(there)[0],'iter_%04i' % inum)
@@ -481,7 +489,8 @@ def meta_indicate(self):
                 os.chdir(os.path.join(self.root, self.rundir))
                 # If indicate.log already exists then we've made some kind of mistake.
                 if os.path.exists('indicate.log'):
-                    raise RuntimeError('indicate.log should not exist yet in this directory: %s' % os.getcwd())
+                    logger.error('indicate.log should not exist yet in this directory: %s\n' % os.getcwd())
+                    raise RuntimeError
                 # Add a handler for printing to screen and file
                 logger = getLogger("forcebalance")
                 hdlr = forcebalance.output.RawFileHandler('indicate.log')
@@ -515,7 +524,8 @@ def meta_get(self, mvals, AGrad=False, AHess=False, customdir=None):
         if Counter() is not None:
             # Not expecting more than ten thousand iterations
             if Counter() > 10000:
-                raise RuntimeError('Cannot handle more than 10000 iterations due to current directory structure.  Consider revising code.')
+                logger.error('Cannot handle more than 10000 iterations due to current directory structure.  Consider revising code.\n')
+                raise RuntimeError
             iterdir = "iter_%04i" % Counter()
             absgetdir = os.path.join(absgetdir,iterdir)
         if customdir is not None:
@@ -638,7 +648,8 @@ def printcool_table(self, data=OrderedDict([]), headings=[], banner=None, footno
         # Sanity check.
         for val in data.values():
             if (len(val)+1) != nc:
-                raise RuntimeError('There are %i column headings, so the values in the data dictionary must be lists of length %i (currently %i)' % (nc, nc-1, len(val)))
+                logger.error('There are %i column headings, so the values in the data dictionary must be lists of length %i (currently %i)\n' % (nc, nc-1, len(val)))
+                raise RuntimeError
         cwidths = [0 for i in range(nc)]
         # Figure out maximum column width.
         # First look at all of the column headings...
@@ -684,7 +695,8 @@ def __init__(self,options,tgt_opts,forcefield):
         self.remote_indicate = ""
 
         if options['wq_port'] == 0:
-            raise RuntimeError("Please set the Work Queue port to use Remote Targets.")
+            logger.error("Please set the Work Queue port to use Remote Targets.\n")
+            raise RuntimeError
 
         # Remote target will read objective.p and indicate.log at the same time,
         # and it uses a different mechanism because it does this at every iteration (not just the 0th).
diff --git a/src/thermo.py b/src/thermo.py
index 351f9957f..c2f6b3a2c 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -255,7 +255,8 @@ def sanity_check(self):
             self.format = "right-aligned fixed width text"
         else:
             # Sanity check - it should never get here unless the parser is incorrect.
-            raise RuntimeError("Fixed-width format detected but columns are neither left-aligned nor right-aligned!")
+            logger.error("Fixed-width format detected but columns are neither left-aligned nor right-aligned!\n")
+            raise RuntimeError
     
 def parse1(fnm):
 
@@ -328,12 +329,12 @@ def __init__(self, options, tgt_opts, forcefield):
             LinkFile(os.path.join(os.path.split(__file__)[0], "data", f),
                      os.path.join(self.root, self.tempdir, f))
     
-    def read_source(self, source):
+    def read_source(self, srcfnm):
         """Read and store source data.
 
         Parameters
         ----------
-        source : string
+        srcfnm : string
             Read source data from this filename.
 
         Returns
@@ -342,40 +343,91 @@ def read_source(self, source):
 
         """
             
-        parser = parse1(source)
-        print parser.headings
-        printcool_dictionary(parser.metadata, title="Metadata")
-        # print parser.table
-        revised_headings = []
+        source = parse1(srcfnm)
+        printcool_dictionary(source.metadata, title="Metadata")
+        # print source.table
+        revhead = []
         obs = ''
         def error_left(i):
-            logger.error('Encountered heading %s but there is no observable to the left\n' % i)
+            logger.error('\x1b[91mEncountered heading %s but there is no observable to the left\x1b[0m\n' % i)
             raise RuntimeError
 
-        for head in parser.headings:
+        def standardize_heading(obs, head, abbrevs, standard_abbrev):
+            if head in abbrevs:
+                if obs == '': error_left(head)
+                return obs + '_' + standard_abbrev, False
+            elif len(head.split('_')) > 1 and head.split('_')[-1] in abbrevs:
+                newhl = head.split('_')
+                newhl[-1] = standard_abbrev
+                return '_'.join(newhl), False
+            else:
+                return head, True
+
+        units = defaultdict(str)
+
+        for i, head in enumerate(source.headings):
+            head = head.lower()
+            if i == 0 and head == 'index': # Treat special case because index can also mean other things
+                revhead.append(head)
+                continue
             usplit = re.split(' *\(', head, maxsplit=1)
+            punit = ''
             if len(usplit) > 1:
                 hfirst = usplit[0]
                 punit = re.sub('\)$','',usplit[1].strip())
                 print "header", head, "split into", hfirst, ",", punit
             else:
                 hfirst = head
-                punit = ''
             newh = hfirst
-            if head.lower() in ['w', 'wt', 'wts']:
-                if obs == '': error_left(head)
-                newh = obs + '_' + hfirst
-            elif head.lower() in ['s', 'sig', 'sigma']:
-                if obs == '': error_left(head)
-                newh = obs + '_' + hfirst
-            elif head.lower() in ['idx']:
-                if obs == '': error_left(head)
-                newh = obs + '_' + hfirst
-            else:
+            newh, o1 = standardize_heading(obs, newh, ['w', 'wt', 'wts', 'weight', 'weights'], 'weight')
+            newh, o2 = standardize_heading(obs, newh, ['s', 'sig', 'sigma', 'sigmas'], 'sigma')
+            newh, o3 = standardize_heading(obs, newh, ['i', 'idx', 'index', 'indices'], 'index')
+            if newh in ['t', 'temp', 'temperature']: newh = 'temperature'
+            if newh in ['p', 'pres', 'pressure']: newh = 'pressure'
+            if all([o1, o2, o3]):
                 obs = hfirst
             if newh != hfirst:
-                print "header", head, "renamed to", newh
+                print "header", hfirst, "renamed to", newh
+            revhead.append(newh)
+            if punit != '':
+                units[newh] = punit
+ 
+        if len(set(revhead)) != len(revhead):
+            logger.error('Column headings : ' + str(revhead) + '\n')
+            logger.error('\x1b[91mColumn headings are not unique!\x1b[0m\n')
+            raise RuntimeError
+
+        print revhead
+        if revhead[0] != 'index':
+            logger.error('\x1b[91mIndex column heading is not present\x1b[0m\n(Add an Index column on the left!)\n')
+            raise RuntimeError
+            
+        uqidx = []
+        saveidx = ''
+        index = []
+        # thisidx = Index that is built from the current row (may be empty)
+        # saveidx = Index that may have been saved from a previous row
+        for row in source.table:
+            thisidx = row[0]
+            if thisidx != '': 
+                saveidx = thisidx
+                if saveidx in uqidx: 
+                    logger.error('Index %s is duplicated in data table\n' % i)
+                    raise RuntimeError
+                uqidx.append(saveidx)
+            index.append(saveidx)
+            if saveidx == '':
+                logger.error('Row of data : ' + str(row) + '\n')
+                logger.error('\x1b[91mThis row does not have an index!\x1b[0m\n')
+                raise RuntimeError
+                
+        self.Data = pd.DataFrame([])
+
+        # pd.DataFrame([OrderedDict([(head, row[i]) for i, head in revised_headings if row[i] != '']) for row in source.table])
+
 
+        # pd.DataFrame(OrderedDict([(head,[row[i] for row in source.table]) for i, head in enumerate(revised_headings)]))
+        # print self.Data.__repr__
         raw_input()
 
         return
diff --git a/src/tinkerio.py b/src/tinkerio.py
index 1dcd5f485..04111ddef 100644
--- a/src/tinkerio.py
+++ b/src/tinkerio.py
@@ -176,7 +176,8 @@ def write_key(fout, options, fin=None, defaults={}, verbose=False, prmfnm=None,
     # Make sure that the keys are lowercase, and the values are all strings.
     options = OrderedDict([(key.lower(), str(val) if val != None else None) for key, val in options.items()])
     if 'parameters' in options and prmfnm != None:
-        raise RuntimeError("Please pass prmfnm or 'parameters':'filename.prm' in options but not both.")
+        logger.error("Please pass prmfnm or 'parameters':'filename.prm' in options but not both.\n")
+        raise RuntimeError
     elif 'parameters' in options:
         prmfnm = options['parameters']
     
@@ -249,7 +250,8 @@ def write_key(fout, options, fin=None, defaults={}, verbose=False, prmfnm=None,
                 val = options[key]
                 val0 = valf.strip()
                 if key in clashes and val != val0:
-                    raise RuntimeError("write_key tried to set %s = %s but its original value was %s = %s" % (key, val, key, val0))
+                    logger.error("write_key tried to set %s = %s but its original value was %s = %s\n" % (key, val, key, val0))
+                    raise RuntimeError
                 # Passing None as the value causes the option to be deleted
                 if val == None: 
                     continue
@@ -283,10 +285,12 @@ def write_key(fout, options, fin=None, defaults={}, verbose=False, prmfnm=None,
         options["parameters"] = prmfnm
     elif not prmflag:
         if not os.path.exists('%s.prm' % os.path.splitext(fout)[0]):
-            raise RuntimeError('No parameter file detected, this will cause TINKER to crash')
+            logger.error('No parameter file detected, this will cause TINKER to crash\n')
+            raise RuntimeError
     for i in chk:
         if i not in haveopts:
-            raise RuntimeError('%s is expected to be in the .key file, but not found' % i)
+            logger.error('%s is expected to be in the .key file, but not found\n' % i)
+            raise RuntimeError
     # Finally write the key file.
     file_out = wopen(fout) 
     for line in out:
@@ -336,7 +340,9 @@ def readsrc(self, **kwargs):
                 self.mol = Molecule(kwargs['coords'])
         else:
             arcfile = onefile('arc')
-            if not arcfile: raise RuntimeError('Cannot determine which .arc file to use')
+            if not arcfile: 
+                logger.error('Cannot determine which .arc file to use\n')
+                raise RuntimeError
             self.mol = Molecule(arcfile)
 
     def calltinker(self, command, stdin=None, print_to_screen=False, print_command=False, **kwargs):
@@ -365,14 +371,16 @@ def calltinker(self, command, stdin=None, print_to_screen=False, print_command=F
                             warn_press_key("ForceBalance requires TINKER %.1f - unexpected behavior with older versions!" % vn_need)
                         self.warn_vn = True
                 except:
-                    raise RuntimeError("Unable to determine TINKER version number!")
+                    logger.error("Unable to determine TINKER version number!\n")
+                    raise RuntimeError
         for line in o[-10:]:
             # Catch exceptions since TINKER does not have exit status.
             if "TINKER is Unable to Continue" in line:
                 for l in o:
                     logger.error("%s\n" % l)
                 time.sleep(1)
-                raise RuntimeError("TINKER may have crashed! (See above output)\nThe command was: %s\nThe directory was: %s" % (' '.join(csplit), os.getcwd()))
+                logger.error("TINKER may have crashed! (See above output)\nThe command was: %s\nThe directory was: %s\n" % (' '.join(csplit), os.getcwd()))
+                raise RuntimeError
                 break
         for line in o:
             if 'D+' in line:
@@ -445,7 +453,8 @@ def prepare(self, pbc=False, **kwargs):
                 tk_opts['gamma'] = None
         if pbc:
             if (not keypbc) and 'boxes' not in self.mol.Data:
-                raise RuntimeError("Periodic boundary conditions require either (1) a-axis to be in the .key file or (b) boxes to be in the coordinate file.")
+                logger.error("Periodic boundary conditions require either (1) a-axis to be in the .key file or (b) boxes to be in the coordinate file.\n")
+                raise RuntimeError
         self.pbc = pbc
         if pbc:
             tk_opts['ewald'] = ''
@@ -802,7 +811,8 @@ def energy_rmsd(self, shot=0, optimize=True):
             if "Total Potential Energy" in line:
                 E = float(line.split()[-2].replace('D','e'))
         if E == None:
-            raise RuntimeError("Total potential energy wasn't encountered when calling analyze!")
+            logger.error("Total potential energy wasn't encountered when calling analyze!\n")
+            raise RuntimeError
         if optimize and abs(E-E_) > 0.1:
             warn_press_key("Energy from optimize and analyze aren't the same (%.3f vs. %.3f)" % (E, E_))
         return E, rmsd
@@ -1018,7 +1028,8 @@ def __init__(self,options,tgt_opts,forcefield):
         # Error checking.
         for i in self.nptfiles:
             if not os.path.exists(os.path.join(self.root, self.tgtdir, i)):
-                raise RuntimeError('Please provide %s; it is needed to proceed.' % i)
+                logger.error('Please provide %s; it is needed to proceed.\n' % i)
+                raise RuntimeError
         # Send back the trajectory file.
         self.extra_output = ['liquid.dyn']
         if self.save_traj > 0:
diff --git a/src/vibration.py b/src/vibration.py
index 132beedda..3b8127d40 100644
--- a/src/vibration.py
+++ b/src/vibration.py
@@ -59,7 +59,8 @@ def __init__(self,options,tgt_opts,forcefield):
         ## Create engine object.
         self.engine = self.engine_(target=self, **engine_args)
         if self.FF.rigid_water:
-            raise Exception('This class cannot be used with rigid water molecules.')
+            logger.error('This class cannot be used with rigid water molecules.\n')
+            raise RuntimeError
 
     def read_reference_data(self):
         """ Read the reference vibrational data from a file. """
@@ -93,7 +94,8 @@ def read_reference_data(self):
                 pass
             else:
                 logger.info(line + '\n')
-                raise Exception("This line doesn't comply with our vibration file format!")
+                logger.error("This line doesn't comply with our vibration file format!\n")
+                raise RuntimeError
             ln += 1
         self.ref_eigvals = np.array(self.ref_eigvals)
         self.ref_eigvecs = np.array(self.ref_eigvecs)
@@ -114,7 +116,8 @@ def vibration_driver(self):
         if hasattr(self, 'engine') and hasattr(self.engine, 'normal_modes'):
             return self.engine.normal_modes()
         else:
-            raise NotImplementedError('Normal mode calculation not supported, try using a different engine')
+            logger.error('Normal mode calculation not supported, try using a different engine\n')
+            raise NotImplementedError
 
 
     def process_vectors(self, vecs, verbose=False, check=False):
diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt b/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt
index 3b653f242..0bfd3a7dc 100644
--- a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt
+++ b/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt
@@ -1,7 +1,7 @@
 # Experimental data for liquid bromine.
 
-  Temp (K) Pressure (bar) Density (kg/m^3) w   Hvap ( kJ/mol ) w    
-  298.15   1.01325        3102.8           1.0 29.96           1.0  
+ Index  Temp (K) Pressure (bar) Density (kg/m^3) w   Hvap ( kJ/mol ) w    
+ 0      298.15   1.01325        3102.8           1.0 29.96           1.0  
 
 # Variables: Denominators and weights for quantities
 Denoms  = 30 0.3
diff --git a/studies/004_thermo_liquid_bromine/test_parse.in b/studies/004_thermo_liquid_bromine/test_parse.in
index 6e1a704df..28c854b1d 100644
--- a/studies/004_thermo_liquid_bromine/test_parse.in
+++ b/studies/004_thermo_liquid_bromine/test_parse.in
@@ -70,7 +70,7 @@ $target
 name LiquidBromine
 type Thermo_GMX
 weight 1.0
-expdata_txt expset.txt
+source expset.txt
 quantities density h_vap
 n_sim_chain 2
 md_steps 100000
@@ -81,7 +81,7 @@ $target
 name LiquidBromine_CSV
 type Thermo_GMX
 weight 1.0
-expdata_txt data.csv
+source data.csv
 quantities density h_vap
 n_sim_chain 2
 md_steps 100000
@@ -92,7 +92,7 @@ $target
 name LiquidBromine_TAB
 type Thermo_GMX
 weight 1.0
-expdata_txt data.tab.txt
+source data.tab.txt
 quantities density h_vap
 n_sim_chain 2
 md_steps 100000
@@ -103,7 +103,7 @@ $target
 name Lipid_SPC
 type Thermo_GMX
 weight 1.0
-expdata_txt lipidcol1.txt
+source lipidcol1.txt
 quantities density h_vap
 n_sim_chain 2
 md_steps 100000
@@ -114,7 +114,7 @@ $target
 name Lipid_RIT
 type Thermo_GMX
 weight 1.0
-expdata_txt lipidcol1.txt
+source lipidcol1.txt
 quantities density h_vap
 n_sim_chain 2
 md_steps 100000
@@ -125,7 +125,7 @@ $target
 name Lipid_TAB
 type Thermo_GMX
 weight 1.0
-expdata_txt lipidcol1.txt
+source lipidcol1.txt
 quantities density h_vap
 n_sim_chain 2
 md_steps 100000
@@ -136,7 +136,7 @@ $target
 name Lipid_MUL
 type Thermo_GMX
 weight 1.0
-expdata_txt lipidcol2a.txt
+source lipidcol2a.txt
 quantities density h_vap
 n_sim_chain 2
 md_steps 100000

From 0baea9b16f0b658315fe6bf66642e29480b378fe Mon Sep 17 00:00:00 2001
From: Lee-Ping Wang <leeping@Lee-Pings-MacBook-Pro.local>
Date: Sun, 6 Apr 2014 20:48:30 -0700
Subject: [PATCH 04/25] Added file referencing in parser and build DataFrame

---
 src/output.py                                 |   2 +
 src/thermo.py                                 | 187 +++++++++++++-----
 .../targets/Lipid_HARD/lipidcol1.txt          |  67 +++++++
 .../targets/Lipid_HARD/scd323.txt             |  17 ++
 .../targets/Lipid_HARD/scd333.txt             |  17 ++
 .../targets/Lipid_HARD/scd338.txt             |  17 ++
 .../targets/Lipid_HARD/scd353.txt             |  17 ++
 .../targets/Lipid_MUL/lipidcol2a.txt          |  10 +-
 .../targets/Lipid_RIT/lipidcol1.txt           | 131 ++++++------
 .../targets/Lipid_SPC/lipidcol1.txt           | 130 ++++++------
 .../targets/Lipid_TAB/lipidcol1.txt           | 130 ++++++------
 .../targets/LiquidBromine_CSV/data.csv        |  16 +-
 .../targets/LiquidBromine_TAB/data.tab.txt    |   4 +-
 .../004_thermo_liquid_bromine/test_parse.in   |  11 ++
 14 files changed, 498 insertions(+), 258 deletions(-)
 create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_HARD/lipidcol1.txt
 create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd323.txt
 create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd333.txt
 create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd338.txt
 create mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd353.txt

diff --git a/src/output.py b/src/output.py
index e42e38cd8..eb760eab3 100644
--- a/src/output.py
+++ b/src/output.py
@@ -12,6 +12,7 @@ def __init__(self, name):
         self.defaultHandler = RawStreamHandler(sys.stdout)
         super(ForceBalanceLogger, self).addHandler(self.defaultHandler)
         self.setLevel(INFO)
+        self.propagate = False
         
     def addHandler(self, hdlr):
         if self.defaultHandler:
@@ -86,3 +87,4 @@ def error(self, msg, *args, **kwargs):
 
 # module level loggers should use the default logger object
 setLoggerClass(ModLogger)
+
diff --git a/src/thermo.py b/src/thermo.py
index c2f6b3a2c..d2eb6ee0d 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -9,15 +9,18 @@
 
 from forcebalance.target import Target
 from forcebalance.finite_difference import in_fd
-from forcebalance.nifty import flat, col, row
+from forcebalance.nifty import flat, col, row, isint
 from forcebalance.nifty import lp_dump, lp_load, wopen, _exec
 from forcebalance.nifty import LinkFile, link_dir_contents
 from forcebalance.nifty import printcool, printcool_dictionary
 
 from collections import defaultdict, OrderedDict
 
-from forcebalance.output import getLogger
+import forcebalance
+from forcebalance.output import *
 logger = getLogger(__name__)
+# print logger.parent.parent.handlers[0]
+# logger.parent.parent.handlers = []
 
 class TextParser(object):
     """ Parse a text file. """
@@ -33,14 +36,14 @@ def is_comment_line(self):
 
     def process_header(self):
         """ Function for setting more attributes using the header line, if needed. """
-        self.headings = [i.strip() for i in self.fields[:]]
+        self.heading = [i.strip() for i in self.fields[:]]
 
     def process_data(self):
         """ Function for setting more attributes using the current line, if needed. """
         trow = []
-        for ifld in range(len(self.headings)):
+        for ifld in range(len(self.heading)):
             if ifld < len(self.fields):
-                trow.append(self.fields[ifld])
+                trow.append(self.fields[ifld].strip())
             else:
                 trow.append('')
         return trow
@@ -49,7 +52,7 @@ def sanity_check(self):
         """ Extra sanity checks. """
 
     def parse(self):
-        self.headings = []                 # Fields in header line
+        self.heading = []                 # Fields in header line
         meta = defaultdict(list)          # Dictionary of metadata
         found_header = 0                  # Whether we found the header line
         table = []                        # List of data records
@@ -240,7 +243,7 @@ def process_data(self):
         if set(fend).issubset(hend):
             for hpos in hend:
                 if hpos in fend:
-                    trow.append(fields[fend.index(hpos)])
+                    trow.append(fields[fend.index(hpos)].strip())
                 else:
                     trow.append('')
         # Field start / end positions for the line of data
@@ -289,6 +292,59 @@ def parse1(fnm):
         return FIX_Parser(fnm)
     return
 
+def fix_suffix(obs, head, suffixs, standard_suffix):
+
+    """ Standardize the suffix in a column heading. """
+
+    if head in suffixs:
+        if obs == '': 
+            logger.error('\x1b[91mEncountered heading %s but there is no observable to the left\x1b[0m\n' % head)
+            raise RuntimeError
+        return obs + '_' + standard_suffix, False
+    elif len(head.split('_')) > 1 and head.split('_')[-1] in suffixs:
+        newhl = head.split('_')
+        newhl[-1] = standard_suffix
+        return '_'.join(newhl), False
+    else:
+        return head, True
+
+def stand_head(head, obs):
+
+    """ 
+    Standardize a column heading.  Does the following:
+
+    1) Make lowercase
+    2) Split off the physical unit
+    3) If a weight, uncertainty or atom index, prepend the observable name
+    4) Shorten temperature and pressure
+    5) Determine if this is a new observable
+    
+    Parameters:
+    head = Name of the heading
+    obs = Name of the observable (e.g. from a previously read field)
+    """
+
+    head = head.lower()
+    usplit = re.split(' *\(', head, maxsplit=1)
+    punit = ''
+    if len(usplit) > 1:
+        hfirst = usplit[0]
+        punit = re.sub('\)$','',usplit[1].strip())
+        print "header", head, "split into", hfirst, ",", punit
+    else:
+        hfirst = head
+    newh = hfirst
+    newh, o1 = fix_suffix(obs, newh, ['w', 'wt', 'wts', 'weight', 'weights'], 'wt')
+    newh, o2 = fix_suffix(obs, newh, ['s', 'sig', 'sigma', 'sigmas'], 'sig')
+    newh, o3 = fix_suffix(obs, newh, ['i', 'idx', 'index', 'indices'], 'idx')
+    if newh in ['t', 'temp', 'temperature']: newh = 'temp'
+    if newh in ['p', 'pres', 'pressure']: newh = 'pres'
+    if all([o1, o2, o3]):
+        obs = newh
+    if newh != hfirst:
+        print "header", hfirst, "renamed to", newh
+    return newh, punit, obs
+
 class Thermo(Target):
     """
     A target for fitting general experimental data sets. The
@@ -343,54 +399,25 @@ def read_source(self, srcfnm):
 
         """
             
+        logger.info('Parsing source file %s\n' % srcfnm)
         source = parse1(srcfnm)
         printcool_dictionary(source.metadata, title="Metadata")
         # print source.table
         revhead = []
         obs = ''
-        def error_left(i):
-            logger.error('\x1b[91mEncountered heading %s but there is no observable to the left\x1b[0m\n' % i)
-            raise RuntimeError
 
-        def standardize_heading(obs, head, abbrevs, standard_abbrev):
-            if head in abbrevs:
-                if obs == '': error_left(head)
-                return obs + '_' + standard_abbrev, False
-            elif len(head.split('_')) > 1 and head.split('_')[-1] in abbrevs:
-                newhl = head.split('_')
-                newhl[-1] = standard_abbrev
-                return '_'.join(newhl), False
-            else:
-                return head, True
 
         units = defaultdict(str)
 
-        for i, head in enumerate(source.headings):
-            head = head.lower()
-            if i == 0 and head == 'index': # Treat special case because index can also mean other things
-                revhead.append(head)
+        for i, head in enumerate(source.heading):
+            if i == 0 and head.lower() == 'index': # Treat special case because index can also mean other things
+                revhead.append('index')
                 continue
-            usplit = re.split(' *\(', head, maxsplit=1)
-            punit = ''
-            if len(usplit) > 1:
-                hfirst = usplit[0]
-                punit = re.sub('\)$','',usplit[1].strip())
-                print "header", head, "split into", hfirst, ",", punit
-            else:
-                hfirst = head
-            newh = hfirst
-            newh, o1 = standardize_heading(obs, newh, ['w', 'wt', 'wts', 'weight', 'weights'], 'weight')
-            newh, o2 = standardize_heading(obs, newh, ['s', 'sig', 'sigma', 'sigmas'], 'sigma')
-            newh, o3 = standardize_heading(obs, newh, ['i', 'idx', 'index', 'indices'], 'index')
-            if newh in ['t', 'temp', 'temperature']: newh = 'temperature'
-            if newh in ['p', 'pres', 'pressure']: newh = 'pressure'
-            if all([o1, o2, o3]):
-                obs = hfirst
-            if newh != hfirst:
-                print "header", hfirst, "renamed to", newh
+            newh, punit, obs = stand_head(head, obs)
             revhead.append(newh)
             if punit != '':
                 units[newh] = punit
+        source.heading = revhead
  
         if len(set(revhead)) != len(revhead):
             logger.error('Column headings : ' + str(revhead) + '\n')
@@ -405,28 +432,94 @@ def standardize_heading(obs, head, abbrevs, standard_abbrev):
         uqidx = []
         saveidx = ''
         index = []
+        snum = 0
+        drows = []
         # thisidx = Index that is built from the current row (may be empty)
         # saveidx = Index that may have been saved from a previous row
-        for row in source.table:
+        # snum = Subindex number
+        # List of (index, heading) tuples which contain file references.
+        fref = OrderedDict()
+        for rn, row in enumerate(source.table):
+            this_insert = []
+            # crow = row[1:]
             thisidx = row[0]
             if thisidx != '': 
                 saveidx = thisidx
+                snum = 0
                 if saveidx in uqidx: 
                     logger.error('Index %s is duplicated in data table\n' % i)
                     raise RuntimeError
                 uqidx.append(saveidx)
-            index.append(saveidx)
+            index.append((saveidx, snum))
             if saveidx == '':
                 logger.error('Row of data : ' + str(row) + '\n')
                 logger.error('\x1b[91mThis row does not have an index!\x1b[0m\n')
                 raise RuntimeError
+            snum += 1
+            if any([':' in fld for fld in row[1:]]):
+                # Here we insert rows from another data table.
+                obs2 = ''
+                for cid_, fld in enumerate(row[1:]):
+                    if ':' not in fld: continue
+                    cid = cid_ + 1
+                    def reffld_error(reason=''):
+                        logger.error('Row: : ' + ' '.join(row) + '\n')
+                        logger.error('Entry : ' + fld + '\n')
+                        logger.error('This filename:column reference is not valid!%s' % 
+                                     (' (%s)' % reason if reason != '' else ''))
+                        raise RuntimeError
+                    if len(fld.split(':')) != 2:
+                        reffld_error('Wrong number of colon-separated fields')
+                    if not isint(fld.split(':')[1]):
+                        reffld_error('Must be an integer after the colon')
+                    fnm = fld.split(':')[0]
+                    fcol_ = int(fld.split(':')[1])
+                    fpath = os.path.join(os.path.split(srcfnm)[0], fnm)
+                    if not os.path.exists(fpath):
+                        reffld_error('%s does not exist' % fpath)
+                    if (saveidx, revhead[cid]) in fref:
+                        reffld_error('%s already contains a file reference' % (saveidx, revhead[cid]))
+                    subfile = parse1(fpath)
+                    fcol = fcol_ - 1
+                    head2, punit2, obs2 = stand_head(subfile.heading[fcol], obs2)
+                    if revhead[cid] != head2:
+                        reffld_error("Column heading of %s (%s) doesn't match original (%s)" % (fnm, head2, revhead[cid]))
+                    fref[(saveidx, revhead[cid])] = [row2[fcol] for row2 in subfile.table]
+
+        for (saveidx, head), newcol in fref.items():
+            inum = 0
+            for irow in range(len(source.table)):
+                if index[irow][0] != saveidx: continue
+                lrow = irow
+                cidx = revhead.index(head)
+                source.table[irow][cidx] = newcol[inum]
+                inum += 1
+                if inum >= len(newcol): break
+            for inum1 in range(inum, len(newcol)):
+                lrow += 1
+                nrow = ['' for i in range(len(revhead))]
+                nrow[cidx] = newcol[inum]
+                print "Inserting", nrow, "after row", lrow
+                source.table.insert(lrow, nrow)
+                index.insert(lrow, (saveidx, inum1))
+
+            # for irow in range(
+            # for irow1 in range(max(0, len(newcol)-inum))
                 
-        self.Data = pd.DataFrame([])
+        for rn, row in enumerate(source.table):
+            drows.append([i if i != '' else np.nan for i in row[1:]])
+
+        print revhead[1:]
+        for rn, row in enumerate(drows):
+            print index[rn], row
+
+        self.Data = pd.DataFrame(drows, columns=revhead[1:], index=index)
+        print repr(self.Data)
 
-        # pd.DataFrame([OrderedDict([(head, row[i]) for i, head in revised_headings if row[i] != '']) for row in source.table])
+        # pd.DataFrame([OrderedDict([(head, row[i]) for i, head in revised_heading if row[i] != '']) for row in source.table])
 
 
-        # pd.DataFrame(OrderedDict([(head,[row[i] for row in source.table]) for i, head in enumerate(revised_headings)]))
+        # pd.DataFrame(OrderedDict([(head,[row[i] for row in source.table]) for i, head in enumerate(revised_heading)]))
         # print self.Data.__repr__
         raw_input()
 
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/lipidcol1.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/lipidcol1.txt
new file mode 100644
index 000000000..ca440c7e5
--- /dev/null
+++ b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/lipidcol1.txt
@@ -0,0 +1,67 @@
+metadata = 'Mao'
+
+Index   T           P (atm)     Al          Al_wt       Scd1_idx    Scd1            Scd2_idx    Scd2            Scd1_wt     Kappa       Kappa_wt    n_ic
+50C     323.15      1           0.631       1           C15         scd323.txt:2    C34         scd323.txt:4    1           58          1           10
+                                                        C17                         C36                                                             
+                                                        C18                         C37                                                             
+                                                        C19                         C38                                                             
+                                                        C20                         C39                                                             
+                                                        C21                         C40                                                             
+                                                        C22                         C41                                                             
+                                                        C23                         C42                                                             
+                                                        C24                         C43                                                             
+                                                        C25                         C44                                                             
+                                                        C26                         C45                                                             
+                                                        C27                         C46                                                             
+                                                        C28                         C47                                                             
+                                                        C29                         C48                                                             
+                                                        C30                         C49                                                             
+                                                        C31                         C50                                                             
+60C     333.15      1           0.65        1           C15         scd333.txt:2    C34         scd333.txt:4    0           58          0           10
+                                                        C17                         C36                                                             
+                                                        C18                         C37                                                             
+                                                        C19                         C38                                                             
+                                                        C20                         C39                     
+                                                        C21                         C40                                                             
+                                                        C22                         C41                                                             
+                                                        C23                         C42                     
+                                                        C24                         C43                                                             
+                                                        C25                         C44                                                             
+                                                        C26                         C45                                                             
+                                                        C27                         C46                                                             
+                                                        C28                         C47                                                             
+                                                        C29                         C48                                                             
+                                                        C30                         C49                                                             
+                                                        C31                         C50                                                             
+65C     338.15      1           0.671       1           C15         scd338.txt:2    C34         scd338.txt:4    1           58          0           10
+                                                        C17                         C36                                                             
+                                                        C18                         C37                                                             
+                                                        C19                         C38                                                             
+                                                        C20                         C39                                                             
+                                                        C21                         C40                                                             
+                                                        C22                         C41                                                             
+                                                        C23                         C42                                                             
+                                                        C24                         C43                                                             
+                                                        C25                         C44                                                             
+                                                        C26                         C45                                                             
+                                                        C27                         C46                                                             
+                                                        C28                         C47                                                             
+                                                        C29                         C48                                                             
+                                                        C30                         C49                                                             
+                                                        C31                         C50                                                             
+80C     353.15      1           0.719       1           C15         scd353.txt:2    C34         scd353.txt:4    1           58          0           10
+                                                        C17                         C36                                                             
+                                                        C18                         C37                                                             
+                                                        C19                         C38                                                             
+                                                        C20                         C39                                                             
+                                                        C21                         C40                                                             
+                                                        C22                         C41                                                             
+                                                        C23                         C42                                                             
+                                                        C24                         C43                                                             
+                                                        C25                         C44                                                             
+                                                        C26                         C45                                                             
+                                                        C27                         C46                                                             
+                                                        C28                         C47                                                             
+                                                        C29                         C48                                                             
+                                                        C30                         C49                                                             
+                                                        C31                         C50                                                             
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd323.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd323.txt
new file mode 100644
index 000000000..57c1cfa5b
--- /dev/null
+++ b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd323.txt
@@ -0,0 +1,17 @@
+Scd1_idx	Scd1	Scd2_idx	Scd2
+C15		C34	
+C17	0.198144	C36	0.198144
+C18	0.198128	C37	0.198128
+C19	0.198111	C38	0.198111
+C20	0.198095	C39	0.198095
+C21	0.198079	C40	0.198079
+C22	0.197799	C41	0.197537
+C23	0.198045	C42	0.198046
+C24	0.178844	C43	0.178844
+C25	0.167527	C44	0.178565
+C26	0.148851	C45	0.16751
+C27	0.134117	C46	0.148834
+C28	0.119646	C47	0.1341
+C29	0.100969	C48	0.110956
+C30	0.07546	C49	0.087549
+C31		C50	
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd333.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd333.txt
new file mode 100644
index 000000000..26ee01c85
--- /dev/null
+++ b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd333.txt
@@ -0,0 +1,17 @@
+Scd1_idx	Scd1	Scd2_idx	Scd2
+C15		C34	
+C17	0.181121	C36	0.181121
+C18	0.180807	C37	0.180807
+C19	0.181055	C38	0.181055
+C20	0.180741	C39	0.180741
+C21	0.180989	C40	0.180989
+C22	0.168579	C41	0.168579
+C23	0.169109	C42	0.169109
+C24	0.149104	C43	0.149104
+C25	0.138945	C44	0.138945
+C26	0.123439	C45	0.138629
+C27	0.112717	C46	0.123968
+C28	0.098056	C47	0.112121
+C29	0.083396	C48	0.089303
+C30	0.062266	C49	0.070424
+C31		C50	
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd338.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd338.txt
new file mode 100644
index 000000000..26ee01c85
--- /dev/null
+++ b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd338.txt
@@ -0,0 +1,17 @@
+Scd1_idx	Scd1	Scd2_idx	Scd2
+C15		C34	
+C17	0.181121	C36	0.181121
+C18	0.180807	C37	0.180807
+C19	0.181055	C38	0.181055
+C20	0.180741	C39	0.180741
+C21	0.180989	C40	0.180989
+C22	0.168579	C41	0.168579
+C23	0.169109	C42	0.169109
+C24	0.149104	C43	0.149104
+C25	0.138945	C44	0.138945
+C26	0.123439	C45	0.138629
+C27	0.112717	C46	0.123968
+C28	0.098056	C47	0.112121
+C29	0.083396	C48	0.089303
+C30	0.062266	C49	0.070424
+C31		C50	
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd353.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd353.txt
new file mode 100644
index 000000000..31434af01
--- /dev/null
+++ b/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd353.txt
@@ -0,0 +1,17 @@
+Scd1_idx	Scd1	Scd2_idx	Scd2
+C15		C34	
+C17	0.162535	C36	0.162535
+C18	0.162817	C37	0.162817
+C19	0.162535	C38	0.162535
+C20	0.162535	C39	0.162535
+C21	0.162817	C40	0.162817
+C22	0.151268	C41	0.151268
+C23	0.142254	C42	0.142254
+C24	0.127606	C43	0.127606
+C25	0.117465	C44	0.117465
+C26	0.101972	C45	0.117183
+C27	0.092676	C46	0.102535
+C28	0.081408	C47	0.092676
+C29	0.068732	C48	0.073239
+C30	0.051267	C49	0.056901
+C31		C50	
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt
index 8ba35c2d9..8d97a0bcc 100644
--- a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt
+++ b/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt
@@ -1,5 +1,5 @@
-T           P (atm)   Al            Al_wt         Scd1_idx      Scd1          Scd2_idx      Scd2          Scd1_wt       Kappa         Kappa_wt      n_ic
-323.15      1         0.631         1             scd323.txt:1  scd323.txt:2  scd323.txt:3  scd323.txt:4  1             58            1             10
-333.15      1         0.65          1             scd333.txt:1  scd333.txt:2  scd333.txt:3  scd333.txt:4  0             58            0             10
-338.15      1         0.671         1             scd338.txt:1  scd338.txt:2  scd338.txt:3  scd338.txt:4  1             58            0             10
-353.15      1         0.719         1             scd353.txt:1  scd353.txt:2  scd353.txt:3  scd353.txt:4  1             58            0             10
+Index   T           P (atm)   Al            Al_wt         Scd1_idx      Scd1          Scd2_idx      Scd2          Scd1_wt       Kappa         Kappa_wt      n_ic
+50C     323.15      1         0.631         1             scd323.txt:1  scd323.txt:2  scd323.txt:3  scd323.txt:4  1             58            1             10
+60C     333.15      1         0.65          1             scd333.txt:1  scd333.txt:2  scd333.txt:3  scd333.txt:4  0             58            0             10
+65C     338.15      1         0.671         1             scd338.txt:1  scd338.txt:2  scd338.txt:3  scd338.txt:4  1             58            0             10
+80C     353.15      1         0.719         1             scd353.txt:1  scd353.txt:2  scd353.txt:3  scd353.txt:4  1             58            0             10
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt
index c26cf23d5..b2824acc2 100644
--- a/studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt
+++ b/studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt
@@ -1,68 +1,67 @@
 metadata = 'Mao'
 
-     T      P (atm)        Al       Al_wt       Scd1_idx         Scd1   Scd2_idx          Scd2   Scd1_wt     Kappa       Kappa_wt    n_ic
-323.15            1     0.631           1            C15                     C34                       1        58              1      10
-                                                     C17     0.198144        C36      0.198144                               
-                                                     C18     0.198128        C37      0.198128                               
-                                                     C19     0.198111        C38      0.198111                               
-                                                     C20     0.198095        C39      0.198095                               
-                                                     C21     0.198079        C40      0.198079                               
-                                                     C22     0.197799        C41      0.197537                               
-                                                     C23     0.198045        C42      0.198046                               
-                                                     C24     0.178844        C43      0.178844                               
-                                                     C25     0.167527        C44      0.178565                               
-                                                     C26     0.148851        C45       0.16751                                
-                                                     C27     0.134117        C46      0.148834                               
-                                                     C28     0.119646        C47        0.1341                                 
-                                                     C29     0.100969        C48      0.110956                               
-                                                     C30      0.07546        C49      0.087549                               
-                                                     C31                     C50                                             
-333.15            1      0.65           1            C15                     C34                       0        58              0      10
-                                                     C17     0.181121        C36      0.181121                               
-                                                     C18     0.180807        C37      0.180807                               
-                                                     C19     0.181055        C38      0.181055                               
-                                                     C20     0.180741        C39      0.180741
-                                                     C21     0.180989        C40      0.180989                                   
-                                                     C22     0.168579        C41      0.168579                               
-                                                     C23     0.169109        C42      0.169109
-                                                     C24     0.149104        C43      0.149104                                   
-                                                     C25     0.138945        C44      0.138945                               
-                                                     C26     0.123439        C45      0.138629                               
-                                                     C27     0.112717        C46      0.123968                               
-                                                     C28     0.098056        C47      0.112121                               
-                                                     C29     0.083396        C48      0.089303                               
-                                                     C30     0.062266        C49      0.070424                               
-                                                     C31                     C50                                             
-338.15            1     0.671           1            C15                     C34                       1        58              0      10
-                                                     C17     0.181121        C36      0.181121                               
-                                                     C18     0.180807        C37      0.180807                               
-                                                     C19     0.181055        C38      0.181055                               
-                                                     C20     0.180741        C39      0.180741                               
-                                                     C21     0.180989        C40      0.180989                               
-                                                     C22     0.168579        C41      0.168579                               
-                                                     C23     0.169109        C42      0.169109                               
-                                                     C24     0.149104        C43      0.149104                               
-                                                     C25     0.138945        C44      0.138945                               
-                                                     C26     0.123439        C45      0.138629                               
-                                                     C27     0.112717        C46      0.123968                               
-                                                     C28     0.098056        C47      0.112121                               
-                                                     C29     0.083396        C48      0.089303                               
-                                                     C30     0.062266        C49      0.070424                               
-                                                     C31                     C50                                             
-353.15            1     0.719           1            C15                     C34                       1        58              0      10
-                                                     C17     0.162535        C36      0.162535                               
-                                                     C18     0.162817        C37      0.162817                               
-                                                     C19     0.162535        C38      0.162535                               
-                                                     C20     0.162535        C39      0.162535                               
-                                                     C21     0.162817        C40      0.162817                               
-                                                     C22     0.151268        C41      0.151268                               
-                                                     C23     0.142254        C42      0.142254                               
-                                                     C24     0.127606        C43      0.127606                               
-                                                     C25     0.117465        C44      0.117465                               
-                                                     C26     0.101972        C45      0.117183                               
-                                                     C27     0.092676        C46      0.102535                               
-                                                     C28     0.081408        C47      0.092676                               
-                                                     C29     0.068732        C48      0.073239                               
-                                                     C30     0.051267        C49      0.056901                               
-                                                     C31                     C50                                             
-                                                                            
+Index        T      P (atm)        Al       Al_wt       Scd1_idx         Scd1   Scd2_idx          Scd2   Scd1_wt     Kappa       Kappa_wt    n_ic
+  50C   323.15            1     0.631           1            C15                     C34                       1        58              1      10
+                                                             C17     0.198144        C36      0.198144                               
+                                                             C18     0.198128        C37      0.198128                               
+                                                             C19     0.198111        C38      0.198111                               
+                                                             C20     0.198095        C39      0.198095                               
+                                                             C21     0.198079        C40      0.198079                               
+                                                             C22     0.197799        C41      0.197537                               
+                                                             C23     0.198045        C42      0.198046                               
+                                                             C24     0.178844        C43      0.178844                               
+                                                             C25     0.167527        C44      0.178565                               
+                                                             C26     0.148851        C45       0.16751                                
+                                                             C27     0.134117        C46      0.148834                               
+                                                             C28     0.119646        C47        0.1341                                 
+                                                             C29     0.100969        C48      0.110956                               
+                                                             C30      0.07546        C49      0.087549                               
+                                                             C31                     C50                                             
+  60C   333.15            1      0.65           1            C15                     C34                       0        58              0      10
+                                                             C17     0.181121        C36      0.181121                               
+                                                             C18     0.180807        C37      0.180807                               
+                                                             C19     0.181055        C38      0.181055                               
+                                                             C20     0.180741        C39      0.180741
+                                                             C21     0.180989        C40      0.180989                                   
+                                                             C22     0.168579        C41      0.168579                               
+                                                             C23     0.169109        C42      0.169109
+                                                             C24     0.149104        C43      0.149104                                   
+                                                             C25     0.138945        C44      0.138945                               
+                                                             C26     0.123439        C45      0.138629                               
+                                                             C27     0.112717        C46      0.123968                               
+                                                             C28     0.098056        C47      0.112121                               
+                                                             C29     0.083396        C48      0.089303                               
+                                                             C30     0.062266        C49      0.070424                               
+                                                             C31                     C50                                             
+  65C   338.15            1     0.671           1            C15                     C34                       1        58              0      10
+                                                             C17     0.181121        C36      0.181121                               
+                                                             C18     0.180807        C37      0.180807                               
+                                                             C19     0.181055        C38      0.181055                               
+                                                             C20     0.180741        C39      0.180741                               
+                                                             C21     0.180989        C40      0.180989                               
+                                                             C22     0.168579        C41      0.168579                               
+                                                             C23     0.169109        C42      0.169109                               
+                                                             C24     0.149104        C43      0.149104                               
+                                                             C25     0.138945        C44      0.138945                               
+                                                             C26     0.123439        C45      0.138629                               
+                                                             C27     0.112717        C46      0.123968                               
+                                                             C28     0.098056        C47      0.112121                               
+                                                             C29     0.083396        C48      0.089303                               
+                                                             C30     0.062266        C49      0.070424                               
+                                                             C31                     C50                                             
+  80C   353.15            1     0.719           1            C15                     C34                       1        58              0      10
+                                                             C17     0.162535        C36      0.162535                               
+                                                             C18     0.162817        C37      0.162817                               
+                                                             C19     0.162535        C38      0.162535                               
+                                                             C20     0.162535        C39      0.162535                               
+                                                             C21     0.162817        C40      0.162817                               
+                                                             C22     0.151268        C41      0.151268                               
+                                                             C23     0.142254        C42      0.142254                               
+                                                             C24     0.127606        C43      0.127606                               
+                                                             C25     0.117465        C44      0.117465                               
+                                                             C26     0.101972        C45      0.117183                               
+                                                             C27     0.092676        C46      0.102535                               
+                                                             C28     0.081408        C47      0.092676                               
+                                                             C29     0.068732        C48      0.073239                               
+                                                             C30     0.051267        C49      0.056901                               
+                                                             C31                     C50                                             
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt
index f2bbb57e1..9aadee124 100644
--- a/studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt
+++ b/studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt
@@ -1,67 +1,67 @@
 metadata = 'Mao'
 
-T           P (atm)     Al          Al_wt       Scd1_idx    Scd1        Scd2_idx    Scd2        Scd1_wt     Kappa       Kappa_wt    n_ic
-323.15      1           0.631       1           C15                     C34                     1           58          1           10
-                                                C17         0.198144    C36         0.198144                                        
-                                                C18         0.198128    C37         0.198128                                        
-                                                C19         0.198111    C38         0.198111                                        
-                                                C20         0.198095    C39         0.198095                                        
-                                                C21         0.198079    C40         0.198079                                        
-                                                C22         0.197799    C41         0.197537                                        
-                                                C23         0.198045    C42         0.198046                                        
-                                                C24         0.178844    C43         0.178844                                        
-                                                C25         0.167527    C44         0.178565                                        
-                                                C26         0.148851    C45         0.16751                                         
-                                                C27         0.134117    C46         0.148834                                        
-                                                C28         0.119646    C47         0.1341                                          
-                                                C29         0.100969    C48         0.110956                                        
-                                                C30         0.07546     C49         0.087549                                        
-                                                C31                     C50                                                         
-333.15      1           0.65        1           C15                     C34                     0           58          0           10
-                                                C17         0.181121    C36         0.181121                                        
-                                                C18         0.180807    C37         0.180807                                        
-                                                C19         0.181055    C38         0.181055                                        
-                                                C20         0.180741    C39         0.180741
-                                                C21         0.180989    C40         0.180989                                        
-                                                C22         0.168579    C41         0.168579                                        
-                                                C23         0.169109    C42         0.169109
-                                                C24         0.149104    C43         0.149104                                        
-                                                C25         0.138945    C44         0.138945                                        
-                                                C26         0.123439    C45         0.138629                                        
-                                                C27         0.112717    C46         0.123968                                        
-                                                C28         0.098056    C47         0.112121                                        
-                                                C29         0.083396    C48         0.089303                                        
-                                                C30         0.062266    C49         0.070424                                        
-                                                C31                     C50                                                         
-338.15      1           0.671       1           C15                     C34                     1           58          0           10
-                                                C17         0.181121    C36         0.181121                                        
-                                                C18         0.180807    C37         0.180807                                        
-                                                C19         0.181055    C38         0.181055                                        
-                                                C20         0.180741    C39         0.180741                                        
-                                                C21         0.180989    C40         0.180989                                        
-                                                C22         0.168579    C41         0.168579                                        
-                                                C23         0.169109    C42         0.169109                                        
-                                                C24         0.149104    C43         0.149104                                        
-                                                C25         0.138945    C44         0.138945                                        
-                                                C26         0.123439    C45         0.138629                                        
-                                                C27         0.112717    C46         0.123968                                        
-                                                C28         0.098056    C47         0.112121                                        
-                                                C29         0.083396    C48         0.089303                                        
-                                                C30         0.062266    C49         0.070424                                        
-                                                C31                     C50                                                         
-353.15      1           0.719       1           C15                     C34                     1           58          0           10
-                                                C17         0.162535    C36         0.162535                                        
-                                                C18         0.162817    C37         0.162817                                        
-                                                C19         0.162535    C38         0.162535                                        
-                                                C20         0.162535    C39         0.162535                                        
-                                                C21         0.162817    C40         0.162817                                        
-                                                C22         0.151268    C41         0.151268                                        
-                                                C23         0.142254    C42         0.142254                                        
-                                                C24         0.127606    C43         0.127606                                        
-                                                C25         0.117465    C44         0.117465                                        
-                                                C26         0.101972    C45         0.117183                                        
-                                                C27         0.092676    C46         0.102535                                        
-                                                C28         0.081408    C47         0.092676                                        
-                                                C29         0.068732    C48         0.073239                                        
-                                                C30         0.051267    C49         0.056901                                        
-                                                C31                     C50                                                         
+Index   T           P (atm)     Al          Al_wt       Scd1_idx    Scd1        Scd2_idx    Scd2        Scd1_wt     Kappa       Kappa_wt    n_ic
+50C     323.15      1           0.631       1           C15                     C34                     1           58          1           10
+                                                        C17         0.198144    C36         0.198144                                        
+                                                        C18         0.198128    C37         0.198128                                        
+                                                        C19         0.198111    C38         0.198111                                        
+                                                        C20         0.198095    C39         0.198095                                        
+                                                        C21         0.198079    C40         0.198079                                        
+                                                        C22         0.197799    C41         0.197537                                        
+                                                        C23         0.198045    C42         0.198046                                        
+                                                        C24         0.178844    C43         0.178844                                        
+                                                        C25         0.167527    C44         0.178565                                        
+                                                        C26         0.148851    C45         0.16751                                         
+                                                        C27         0.134117    C46         0.148834                                        
+                                                        C28         0.119646    C47         0.1341                                          
+                                                        C29         0.100969    C48         0.110956                                        
+                                                        C30         0.07546     C49         0.087549                                        
+                                                        C31                     C50                                                         
+60C     333.15      1           0.65        1           C15                     C34                     0           58          0           10
+                                                        C17         0.181121    C36         0.181121                                        
+                                                        C18         0.180807    C37         0.180807                                        
+                                                        C19         0.181055    C38         0.181055                                        
+                                                        C20         0.180741    C39         0.180741
+                                                        C21         0.180989    C40         0.180989                                        
+                                                        C22         0.168579    C41         0.168579                                        
+                                                        C23         0.169109    C42         0.169109
+                                                        C24         0.149104    C43         0.149104                                        
+                                                        C25         0.138945    C44         0.138945                                        
+                                                        C26         0.123439    C45         0.138629                                        
+                                                        C27         0.112717    C46         0.123968                                        
+                                                        C28         0.098056    C47         0.112121                                        
+                                                        C29         0.083396    C48         0.089303                                        
+                                                        C30         0.062266    C49         0.070424                                        
+                                                        C31                     C50                                                         
+65C     338.15      1           0.671       1           C15                     C34                     1           58          0           10
+                                                        C17         0.181121    C36         0.181121                                        
+                                                        C18         0.180807    C37         0.180807                                        
+                                                        C19         0.181055    C38         0.181055                                        
+                                                        C20         0.180741    C39         0.180741                                        
+                                                        C21         0.180989    C40         0.180989                                        
+                                                        C22         0.168579    C41         0.168579                                        
+                                                        C23         0.169109    C42         0.169109                                        
+                                                        C24         0.149104    C43         0.149104                                        
+                                                        C25         0.138945    C44         0.138945                                        
+                                                        C26         0.123439    C45         0.138629                                        
+                                                        C27         0.112717    C46         0.123968                                        
+                                                        C28         0.098056    C47         0.112121                                        
+                                                        C29         0.083396    C48         0.089303                                        
+                                                        C30         0.062266    C49         0.070424                                        
+                                                        C31                     C50                                                         
+80C     353.15      1           0.719       1           C15                     C34                     1           58          0           10
+                                                        C17         0.162535    C36         0.162535                                        
+                                                        C18         0.162817    C37         0.162817                                        
+                                                        C19         0.162535    C38         0.162535                                        
+                                                        C20         0.162535    C39         0.162535                                        
+                                                        C21         0.162817    C40         0.162817                                        
+                                                        C22         0.151268    C41         0.151268                                        
+                                                        C23         0.142254    C42         0.142254                                        
+                                                        C24         0.127606    C43         0.127606                                        
+                                                        C25         0.117465    C44         0.117465                                        
+                                                        C26         0.101972    C45         0.117183                                        
+                                                        C27         0.092676    C46         0.102535                                        
+                                                        C28         0.081408    C47         0.092676                                        
+                                                        C29         0.068732    C48         0.073239                                        
+                                                        C30         0.051267    C49         0.056901                                        
+                                                        C31                     C50                                                         
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt
index 0ec75d7af..de4c22f46 100644
--- a/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt
+++ b/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt
@@ -1,65 +1,65 @@
-T	P	Punit	MBAR	Al	Al_wt	Scd1_idx	Scd1	Scd2_idx	Scd2	Scd1_wt	Kappa	Kappa_wt	n_ic
-323.15	1	atm	FALSE	0.631	1	C15		C34		1	58	1	10
-						C17	0.198144	C36	0.198144				
-						C18	0.198128	C37	0.198128				
-						C19	0.198111	C38	0.198111				
-						C20	0.198095	C39	0.198095				
-						C21	0.198079	C40	0.198079				
-						C22	0.197799	C41	0.197537				
-						C23	0.198045	C42	0.198046				
-						C24	0.178844	C43	0.178844				
-						C25	0.167527	C44	0.178565				
-						C26	0.148851	C45	0.16751				
-						C27	0.134117	C46	0.148834				
-						C28	0.119646	C47	0.1341				
-						C29	0.100969	C48	0.110956				
-						C30	0.07546	C49	0.087549				
-						C31		C50					
-333.15	1	atm	FALSE	0.65	1	C15		C34		0	58	0	10
-						C17	0.181121	C36	0.181121				
-						C18	0.180807	C37	0.180807				
-						C19	0.181055	C38	0.181055				
-						C20	0.180741	C39	0.180741				
-						C21	0.180989	C40	0.180989				
-						C22	0.168579	C41	0.168579				
-						C23	0.169109	C42	0.169109				
-						C24	0.149104	C43	0.149104				
-						C25	0.138945	C44	0.138945				
-						C26	0.123439	C45	0.138629				
-						C27	0.112717	C46	0.123968				
-						C28	0.098056	C47	0.112121				
-						C29	0.083396	C48	0.089303				
-						C30	0.062266	C49	0.070424				
-						C31		C50					
-338.15	1	atm	FALSE	0.671	1	C15		C34		1	58	0	10
-						C17	0.181121	C36	0.181121				
-						C18	0.180807	C37	0.180807				
-						C19	0.181055	C38	0.181055				
-						C20	0.180741	C39	0.180741				
-						C21	0.180989	C40	0.180989				
-						C22	0.168579	C41	0.168579				
-						C23	0.169109	C42	0.169109				
-						C24	0.149104	C43	0.149104				
-						C25	0.138945	C44	0.138945				
-						C26	0.123439	C45	0.138629				
-						C27	0.112717	C46	0.123968				
-						C28	0.098056	C47	0.112121				
-						C29	0.083396	C48	0.089303				
-						C30	0.062266	C49	0.070424				
-						C31		C50					
-353.15	1	atm	FALSE	0.719	1	C15		C34		1	58	0	10
-						C17	0.162535	C36	0.162535				
-						C18	0.162817	C37	0.162817				
-						C19	0.162535	C38	0.162535				
-						C20	0.162535	C39	0.162535				
-						C21	0.162817	C40	0.162817				
-						C22	0.151268	C41	0.151268				
-						C23	0.142254	C42	0.142254				
-						C24	0.127606	C43	0.127606				
-						C25	0.117465	C44	0.117465				
-						C26	0.101972	C45	0.117183				
-						C27	0.092676	C46	0.102535				
-						C28	0.081408	C47	0.092676				
-						C29	0.068732	C48	0.073239				
-						C30	0.051267	C49	0.056901				
-						C31		C50					
+Index	T	P	Punit	MBAR	Al	Al_wt	Scd1_idx	Scd1	Scd2_idx	Scd2	Scd1_wt	Kappa	Kappa_wt	n_ic
+50C	323.15	1	atm	FALSE	0.631	1	C15		C34		1	58	1	10
+							C17	0.198144	C36	0.198144				
+							C18	0.198128	C37	0.198128				
+							C19	0.198111	C38	0.198111				
+							C20	0.198095	C39	0.198095				
+							C21	0.198079	C40	0.198079				
+							C22	0.197799	C41	0.197537				
+							C23	0.198045	C42	0.198046				
+							C24	0.178844	C43	0.178844				
+							C25	0.167527	C44	0.178565				
+							C26	0.148851	C45	0.16751				
+							C27	0.134117	C46	0.148834				
+							C28	0.119646	C47	0.1341				
+							C29	0.100969	C48	0.110956				
+							C30	0.07546	C49	0.087549				
+							C31		C50					
+60C	333.15	1	atm	FALSE	0.65	1	C15		C34		0	58	0	10
+							C17	0.181121	C36	0.181121				
+							C18	0.180807	C37	0.180807				
+							C19	0.181055	C38	0.181055				
+							C20	0.180741	C39	0.180741				
+							C21	0.180989	C40	0.180989				
+							C22	0.168579	C41	0.168579				
+							C23	0.169109	C42	0.169109				
+							C24	0.149104	C43	0.149104				
+							C25	0.138945	C44	0.138945				
+							C26	0.123439	C45	0.138629				
+							C27	0.112717	C46	0.123968				
+							C28	0.098056	C47	0.112121				
+							C29	0.083396	C48	0.089303				
+							C30	0.062266	C49	0.070424				
+							C31		C50					
+65C	338.15	1	atm	FALSE	0.671	1	C15		C34		1	58	0	10
+							C17	0.181121	C36	0.181121				
+							C18	0.180807	C37	0.180807				
+							C19	0.181055	C38	0.181055				
+							C20	0.180741	C39	0.180741				
+							C21	0.180989	C40	0.180989				
+							C22	0.168579	C41	0.168579				
+							C23	0.169109	C42	0.169109				
+							C24	0.149104	C43	0.149104				
+							C25	0.138945	C44	0.138945				
+							C26	0.123439	C45	0.138629				
+							C27	0.112717	C46	0.123968				
+							C28	0.098056	C47	0.112121				
+							C29	0.083396	C48	0.089303				
+							C30	0.062266	C49	0.070424				
+							C31		C50					
+80C	353.15	1	atm	FALSE	0.719	1	C15		C34		1	58	0	10
+							C17	0.162535	C36	0.162535				
+							C18	0.162817	C37	0.162817				
+							C19	0.162535	C38	0.162535				
+							C20	0.162535	C39	0.162535				
+							C21	0.162817	C40	0.162817				
+							C22	0.151268	C41	0.151268				
+							C23	0.142254	C42	0.142254				
+							C24	0.127606	C43	0.127606				
+							C25	0.117465	C44	0.117465				
+							C26	0.101972	C45	0.117183				
+							C27	0.092676	C46	0.102535				
+							C28	0.081408	C47	0.092676				
+							C29	0.068732	C48	0.073239				
+							C30	0.051267	C49	0.056901				
+							C31		C50					
diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv b/studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv
index 847381612..354b70778 100644
--- a/studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv
+++ b/studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv
@@ -1,8 +1,8 @@
-"# Experimental data for liquid, bromine.",,,,,
-,,,,,
-Temp (K),Density (kg/m^3),w,Hvap (kJ/mol),w,Pressure (bar)
-298.15,3102.8,1,29.96,1,1.01325
-,,,,,
-# Variables: Denominators and weights for quantities,,,,,
-Denoms,=,30,0.3,,
-Weights,=,1.0,1.0,,
+"# Experimental data for liquid, bromine.",,,,,,
+,,,,,,
+Index,Temp (K),Density (kg/m^3),w,Hvap (kJ/mol),w,Pressure (bar)
+298.15K-1.0atm,298.15,3102.8,1,29.96,1,1.01325
+,,,,,,
+# Variables: Denominators and weights for quantities,,,,,,
+Denoms,=,30,0.3,,,
+Weights,=,1.0,1.0,,,
diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt b/studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt
index 333f48bbb..155adc470 100644
--- a/studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt
+++ b/studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt
@@ -1,7 +1,7 @@
 "# Experimental data for liquid, bromine."						
 						
-Temp (K)	Density (kg/m^3)	w	Hvap (kJ/mol)	w	Pressure (bar)
-298.15	3102.8	1	29.96	1	1.01325
+Index	Temp (K)	Density (kg/m^3)	w	Hvap (kJ/mol)	w	Pressure (bar)
+0	298.15	3102.8	1	29.96	1	1.01325
 						
 # Variables: Denominators and weights for quantities						
 Denoms	=	30	0.3			
diff --git a/studies/004_thermo_liquid_bromine/test_parse.in b/studies/004_thermo_liquid_bromine/test_parse.in
index 28c854b1d..858f4e6ab 100644
--- a/studies/004_thermo_liquid_bromine/test_parse.in
+++ b/studies/004_thermo_liquid_bromine/test_parse.in
@@ -143,3 +143,14 @@ md_steps 100000
 eq_steps 50000
 $end
 
+$target
+name Lipid_HARD
+type Thermo_GMX
+weight 1.0
+source lipidcol1.txt
+quantities density h_vap
+n_sim_chain 2
+md_steps 100000
+eq_steps 50000
+$end
+

From e0c37850f4093b5f254155b4391ceb65bfde9049 Mon Sep 17 00:00:00 2001
From: Lee-Ping Wang <leeping@Lee-Pings-MacBook-Pro.local>
Date: Sun, 6 Apr 2014 23:06:38 -0700
Subject: [PATCH 05/25] Added unit test for data file parsing (lipid)

---
 src/thermo.py                                 |  7 +-
 .../forcefield/about.txt                      |  0
 .../forcefield/bro.itp                        |  0
 .../forcefield/bro.orig.itp                   |  0
 .../optimize.in                               |  0
 .../single.in                                 |  0
 .../targets/Lipid_HARD/lipidcol1.txt          |  0
 .../targets/Lipid_HARD/scd323.txt             |  0
 .../targets/Lipid_HARD/scd333.txt             |  0
 .../targets/Lipid_HARD/scd338.txt             |  0
 .../targets/Lipid_HARD/scd353.txt             |  0
 .../targets/Lipid_MUL/lipidcol2a.txt          |  0
 .../targets/Lipid_MUL/scd323.txt              |  0
 .../targets/Lipid_MUL/scd333.txt              |  0
 .../targets/Lipid_MUL/scd338.txt              |  0
 .../targets/Lipid_MUL/scd353.txt              |  0
 .../targets/Lipid_RIT/lipidcol1.txt           |  0
 .../targets/Lipid_SPC/lipidcol1.txt           |  0
 .../targets/Lipid_TAB/lipidcol1.txt           | 65 +++++++++++++++++++
 .../targets/LiquidBromine/1/sim1.gro          |  0
 .../targets/LiquidBromine/1/sim1.mdp          |  0
 .../targets/LiquidBromine/1/sim1.top          |  0
 .../targets/LiquidBromine/1/sim2.gro          |  0
 .../targets/LiquidBromine/1/sim2.mdp          |  0
 .../targets/LiquidBromine/1/sim2.top          |  0
 .../targets/LiquidBromine/about.txt           |  0
 .../targets/LiquidBromine/data.csv            |  0
 .../targets/LiquidBromine/expset.txt          |  0
 .../targets/LiquidBromine_CSV/data.csv        |  0
 .../targets/LiquidBromine_TAB/data.tab.txt    |  0
 .../test_parse.in                             |  0
 .../targets/Lipid_TAB/lipidcol1.txt           | 65 -------------------
 test/test_system.py                           |  2 +-
 test/test_thermo.py                           | 53 +++++++++++++++
 34 files changed, 123 insertions(+), 69 deletions(-)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/forcefield/about.txt (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/forcefield/bro.itp (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/forcefield/bro.orig.itp (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/optimize.in (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/single.in (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_HARD/lipidcol1.txt (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_HARD/scd323.txt (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_HARD/scd333.txt (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_HARD/scd338.txt (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_HARD/scd353.txt (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_MUL/lipidcol2a.txt (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_MUL/scd323.txt (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_MUL/scd333.txt (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_MUL/scd338.txt (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_MUL/scd353.txt (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_RIT/lipidcol1.txt (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/Lipid_SPC/lipidcol1.txt (100%)
 create mode 100644 studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/1/sim1.gro (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/1/sim1.mdp (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/1/sim1.top (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/1/sim2.gro (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/1/sim2.mdp (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/1/sim2.top (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/about.txt (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/data.csv (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine/expset.txt (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine_CSV/data.csv (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/targets/LiquidBromine_TAB/data.tab.txt (100%)
 rename studies/{004_thermo_liquid_bromine => 004_thermo}/test_parse.in (100%)
 delete mode 100644 studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt
 create mode 100644 test/test_thermo.py

diff --git a/src/thermo.py b/src/thermo.py
index d2eb6ee0d..2579f4059 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -498,7 +498,7 @@ def reffld_error(reason=''):
             for inum1 in range(inum, len(newcol)):
                 lrow += 1
                 nrow = ['' for i in range(len(revhead))]
-                nrow[cidx] = newcol[inum]
+                nrow[cidx] = newcol[inum1]
                 print "Inserting", nrow, "after row", lrow
                 source.table.insert(lrow, nrow)
                 index.insert(lrow, (saveidx, inum1))
@@ -521,10 +521,11 @@ def reffld_error(reason=''):
 
         # pd.DataFrame(OrderedDict([(head,[row[i] for row in source.table]) for i, head in enumerate(revised_heading)]))
         # print self.Data.__repr__
-        raw_input()
-
+        # raw_input()
         return
 
+        # return
+
         fp = open(expdata)
         
         line         = fp.readline()
diff --git a/studies/004_thermo_liquid_bromine/forcefield/about.txt b/studies/004_thermo/forcefield/about.txt
similarity index 100%
rename from studies/004_thermo_liquid_bromine/forcefield/about.txt
rename to studies/004_thermo/forcefield/about.txt
diff --git a/studies/004_thermo_liquid_bromine/forcefield/bro.itp b/studies/004_thermo/forcefield/bro.itp
similarity index 100%
rename from studies/004_thermo_liquid_bromine/forcefield/bro.itp
rename to studies/004_thermo/forcefield/bro.itp
diff --git a/studies/004_thermo_liquid_bromine/forcefield/bro.orig.itp b/studies/004_thermo/forcefield/bro.orig.itp
similarity index 100%
rename from studies/004_thermo_liquid_bromine/forcefield/bro.orig.itp
rename to studies/004_thermo/forcefield/bro.orig.itp
diff --git a/studies/004_thermo_liquid_bromine/optimize.in b/studies/004_thermo/optimize.in
similarity index 100%
rename from studies/004_thermo_liquid_bromine/optimize.in
rename to studies/004_thermo/optimize.in
diff --git a/studies/004_thermo_liquid_bromine/single.in b/studies/004_thermo/single.in
similarity index 100%
rename from studies/004_thermo_liquid_bromine/single.in
rename to studies/004_thermo/single.in
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/lipidcol1.txt b/studies/004_thermo/targets/Lipid_HARD/lipidcol1.txt
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/Lipid_HARD/lipidcol1.txt
rename to studies/004_thermo/targets/Lipid_HARD/lipidcol1.txt
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd323.txt b/studies/004_thermo/targets/Lipid_HARD/scd323.txt
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd323.txt
rename to studies/004_thermo/targets/Lipid_HARD/scd323.txt
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd333.txt b/studies/004_thermo/targets/Lipid_HARD/scd333.txt
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd333.txt
rename to studies/004_thermo/targets/Lipid_HARD/scd333.txt
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd338.txt b/studies/004_thermo/targets/Lipid_HARD/scd338.txt
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd338.txt
rename to studies/004_thermo/targets/Lipid_HARD/scd338.txt
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd353.txt b/studies/004_thermo/targets/Lipid_HARD/scd353.txt
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/Lipid_HARD/scd353.txt
rename to studies/004_thermo/targets/Lipid_HARD/scd353.txt
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt b/studies/004_thermo/targets/Lipid_MUL/lipidcol2a.txt
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/Lipid_MUL/lipidcol2a.txt
rename to studies/004_thermo/targets/Lipid_MUL/lipidcol2a.txt
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd323.txt b/studies/004_thermo/targets/Lipid_MUL/scd323.txt
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd323.txt
rename to studies/004_thermo/targets/Lipid_MUL/scd323.txt
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd333.txt b/studies/004_thermo/targets/Lipid_MUL/scd333.txt
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd333.txt
rename to studies/004_thermo/targets/Lipid_MUL/scd333.txt
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd338.txt b/studies/004_thermo/targets/Lipid_MUL/scd338.txt
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd338.txt
rename to studies/004_thermo/targets/Lipid_MUL/scd338.txt
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd353.txt b/studies/004_thermo/targets/Lipid_MUL/scd353.txt
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/Lipid_MUL/scd353.txt
rename to studies/004_thermo/targets/Lipid_MUL/scd353.txt
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt b/studies/004_thermo/targets/Lipid_RIT/lipidcol1.txt
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/Lipid_RIT/lipidcol1.txt
rename to studies/004_thermo/targets/Lipid_RIT/lipidcol1.txt
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt b/studies/004_thermo/targets/Lipid_SPC/lipidcol1.txt
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/Lipid_SPC/lipidcol1.txt
rename to studies/004_thermo/targets/Lipid_SPC/lipidcol1.txt
diff --git a/studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt b/studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt
new file mode 100644
index 000000000..c67aece18
--- /dev/null
+++ b/studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt
@@ -0,0 +1,65 @@
+Index	T	P	Al	Al_wt	Scd1_idx	Scd1	Scd2_idx	Scd2	Scd1_wt	Kappa	Kappa_wt	n_ic
+50C	323.15	1	0.631	1	C15		C34		1	58	1	10
+					C17	0.198144	C36	0.198144				
+					C18	0.198128	C37	0.198128				
+					C19	0.198111	C38	0.198111				
+					C20	0.198095	C39	0.198095				
+					C21	0.198079	C40	0.198079				
+					C22	0.197799	C41	0.197537				
+					C23	0.198045	C42	0.198046				
+					C24	0.178844	C43	0.178844				
+					C25	0.167527	C44	0.178565				
+					C26	0.148851	C45	0.16751				
+					C27	0.134117	C46	0.148834				
+					C28	0.119646	C47	0.1341				
+					C29	0.100969	C48	0.110956				
+					C30	0.07546	C49	0.087549				
+					C31		C50					
+60C	333.15	1	0.65	1	C15		C34		0	58	0	10
+					C17	0.181121	C36	0.181121				
+					C18	0.180807	C37	0.180807				
+					C19	0.181055	C38	0.181055				
+					C20	0.180741	C39	0.180741				
+					C21	0.180989	C40	0.180989				
+					C22	0.168579	C41	0.168579				
+					C23	0.169109	C42	0.169109				
+					C24	0.149104	C43	0.149104				
+					C25	0.138945	C44	0.138945				
+					C26	0.123439	C45	0.138629				
+					C27	0.112717	C46	0.123968				
+					C28	0.098056	C47	0.112121				
+					C29	0.083396	C48	0.089303				
+					C30	0.062266	C49	0.070424				
+					C31		C50					
+65C	338.15	1	0.671	1	C15		C34		1	58	0	10
+					C17	0.181121	C36	0.181121				
+					C18	0.180807	C37	0.180807				
+					C19	0.181055	C38	0.181055				
+					C20	0.180741	C39	0.180741				
+					C21	0.180989	C40	0.180989				
+					C22	0.168579	C41	0.168579				
+					C23	0.169109	C42	0.169109				
+					C24	0.149104	C43	0.149104				
+					C25	0.138945	C44	0.138945				
+					C26	0.123439	C45	0.138629				
+					C27	0.112717	C46	0.123968				
+					C28	0.098056	C47	0.112121				
+					C29	0.083396	C48	0.089303				
+					C30	0.062266	C49	0.070424				
+					C31		C50					
+80C	353.15	1	0.719	1	C15		C34		1	58	0	10
+					C17	0.162535	C36	0.162535				
+					C18	0.162817	C37	0.162817				
+					C19	0.162535	C38	0.162535				
+					C20	0.162535	C39	0.162535				
+					C21	0.162817	C40	0.162817				
+					C22	0.151268	C41	0.151268				
+					C23	0.142254	C42	0.142254				
+					C24	0.127606	C43	0.127606				
+					C25	0.117465	C44	0.117465				
+					C26	0.101972	C45	0.117183				
+					C27	0.092676	C46	0.102535				
+					C28	0.081408	C47	0.092676				
+					C29	0.068732	C48	0.073239				
+					C30	0.051267	C49	0.056901				
+					C31		C50					
diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.gro b/studies/004_thermo/targets/LiquidBromine/1/sim1.gro
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.gro
rename to studies/004_thermo/targets/LiquidBromine/1/sim1.gro
diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.mdp b/studies/004_thermo/targets/LiquidBromine/1/sim1.mdp
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.mdp
rename to studies/004_thermo/targets/LiquidBromine/1/sim1.mdp
diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.top b/studies/004_thermo/targets/LiquidBromine/1/sim1.top
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.top
rename to studies/004_thermo/targets/LiquidBromine/1/sim1.top
diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.gro b/studies/004_thermo/targets/LiquidBromine/1/sim2.gro
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.gro
rename to studies/004_thermo/targets/LiquidBromine/1/sim2.gro
diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.mdp b/studies/004_thermo/targets/LiquidBromine/1/sim2.mdp
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.mdp
rename to studies/004_thermo/targets/LiquidBromine/1/sim2.mdp
diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.top b/studies/004_thermo/targets/LiquidBromine/1/sim2.top
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.top
rename to studies/004_thermo/targets/LiquidBromine/1/sim2.top
diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/about.txt b/studies/004_thermo/targets/LiquidBromine/about.txt
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/about.txt
rename to studies/004_thermo/targets/LiquidBromine/about.txt
diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/data.csv b/studies/004_thermo/targets/LiquidBromine/data.csv
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/data.csv
rename to studies/004_thermo/targets/LiquidBromine/data.csv
diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt b/studies/004_thermo/targets/LiquidBromine/expset.txt
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt
rename to studies/004_thermo/targets/LiquidBromine/expset.txt
diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv b/studies/004_thermo/targets/LiquidBromine_CSV/data.csv
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine_CSV/data.csv
rename to studies/004_thermo/targets/LiquidBromine_CSV/data.csv
diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt b/studies/004_thermo/targets/LiquidBromine_TAB/data.tab.txt
similarity index 100%
rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine_TAB/data.tab.txt
rename to studies/004_thermo/targets/LiquidBromine_TAB/data.tab.txt
diff --git a/studies/004_thermo_liquid_bromine/test_parse.in b/studies/004_thermo/test_parse.in
similarity index 100%
rename from studies/004_thermo_liquid_bromine/test_parse.in
rename to studies/004_thermo/test_parse.in
diff --git a/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt b/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt
deleted file mode 100644
index de4c22f46..000000000
--- a/studies/004_thermo_liquid_bromine/targets/Lipid_TAB/lipidcol1.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-Index	T	P	Punit	MBAR	Al	Al_wt	Scd1_idx	Scd1	Scd2_idx	Scd2	Scd1_wt	Kappa	Kappa_wt	n_ic
-50C	323.15	1	atm	FALSE	0.631	1	C15		C34		1	58	1	10
-							C17	0.198144	C36	0.198144				
-							C18	0.198128	C37	0.198128				
-							C19	0.198111	C38	0.198111				
-							C20	0.198095	C39	0.198095				
-							C21	0.198079	C40	0.198079				
-							C22	0.197799	C41	0.197537				
-							C23	0.198045	C42	0.198046				
-							C24	0.178844	C43	0.178844				
-							C25	0.167527	C44	0.178565				
-							C26	0.148851	C45	0.16751				
-							C27	0.134117	C46	0.148834				
-							C28	0.119646	C47	0.1341				
-							C29	0.100969	C48	0.110956				
-							C30	0.07546	C49	0.087549				
-							C31		C50					
-60C	333.15	1	atm	FALSE	0.65	1	C15		C34		0	58	0	10
-							C17	0.181121	C36	0.181121				
-							C18	0.180807	C37	0.180807				
-							C19	0.181055	C38	0.181055				
-							C20	0.180741	C39	0.180741				
-							C21	0.180989	C40	0.180989				
-							C22	0.168579	C41	0.168579				
-							C23	0.169109	C42	0.169109				
-							C24	0.149104	C43	0.149104				
-							C25	0.138945	C44	0.138945				
-							C26	0.123439	C45	0.138629				
-							C27	0.112717	C46	0.123968				
-							C28	0.098056	C47	0.112121				
-							C29	0.083396	C48	0.089303				
-							C30	0.062266	C49	0.070424				
-							C31		C50					
-65C	338.15	1	atm	FALSE	0.671	1	C15		C34		1	58	0	10
-							C17	0.181121	C36	0.181121				
-							C18	0.180807	C37	0.180807				
-							C19	0.181055	C38	0.181055				
-							C20	0.180741	C39	0.180741				
-							C21	0.180989	C40	0.180989				
-							C22	0.168579	C41	0.168579				
-							C23	0.169109	C42	0.169109				
-							C24	0.149104	C43	0.149104				
-							C25	0.138945	C44	0.138945				
-							C26	0.123439	C45	0.138629				
-							C27	0.112717	C46	0.123968				
-							C28	0.098056	C47	0.112121				
-							C29	0.083396	C48	0.089303				
-							C30	0.062266	C49	0.070424				
-							C31		C50					
-80C	353.15	1	atm	FALSE	0.719	1	C15		C34		1	58	0	10
-							C17	0.162535	C36	0.162535				
-							C18	0.162817	C37	0.162817				
-							C19	0.162535	C38	0.162535				
-							C20	0.162535	C39	0.162535				
-							C21	0.162817	C40	0.162817				
-							C22	0.151268	C41	0.151268				
-							C23	0.142254	C42	0.142254				
-							C24	0.127606	C43	0.127606				
-							C25	0.117465	C44	0.117465				
-							C26	0.101972	C45	0.117183				
-							C27	0.092676	C46	0.102535				
-							C28	0.081408	C47	0.092676				
-							C29	0.068732	C48	0.073239				
-							C30	0.051267	C49	0.056901				
-							C31		C50					
diff --git a/test/test_system.py b/test/test_system.py
index 927e7e783..9f87b9cd4 100644
--- a/test/test_system.py
+++ b/test/test_system.py
@@ -179,7 +179,7 @@ def runTest(self):
 class TestThermoBromineStudy(ForceBalanceTestCase):
     def setUp(self):
         super(ForceBalanceTestCase,self).setUp()
-        os.chdir('studies/004_thermo_liquid_bromine')
+        os.chdir('studies/004_thermo')
 
     def tearDown(self):
         os.system('rm -rf results *.bak *.tmp')
diff --git a/test/test_thermo.py b/test/test_thermo.py
new file mode 100644
index 000000000..9baa65e1b
--- /dev/null
+++ b/test/test_thermo.py
@@ -0,0 +1,53 @@
+import unittest
+import sys, os, re
+import forcebalance
+import abc
+import numpy
+from __init__ import ForceBalanceTestCase
+from collections import defaultdict, OrderedDict
+
+class TestParser(ForceBalanceTestCase):
+    def setUp(self):
+        os.chdir(os.path.join(os.getcwd(), 'studies', '004_thermo'))
+        input_file='test_parse.in'
+        options, tgt_opts = forcebalance.parser.parse_inputs(input_file)
+        forcefield = forcebalance.forcefield.FF(options)
+        self.objective = forcebalance.objective.Objective(options, tgt_opts, forcefield)
+
+    def test_lipid_parser(self):
+        """Test for equality amongst multiple ways to parse lipid experimental data"""
+        # Build a dictionary of target name : dataframes
+        lipid_data = OrderedDict()
+        for tgt in self.objective.Targets:
+            if 'lipid' in tgt.name.lower():
+                lipid_data[tgt.name] = tgt.Data
+        # Double loop over different targets
+        for i, ikey in enumerate(lipid_data.keys()):
+            for j, jkey in enumerate(lipid_data.keys()):
+                # Check column headings and row indices
+                self.assertTrue(all(lipid_data[ikey].columns == lipid_data[jkey].columns), msg='\nColumn headings not equal for %s and %s' % (ikey, jkey))
+                self.assertTrue(all(lipid_data[ikey].index == lipid_data[jkey].index), msg='\nRow indices not equal for %s and %s' % (ikey, jkey))
+                # Make dictionary representation of dataframes
+                dicti = lipid_data[ikey].to_dict()
+                dictj = lipid_data[jkey].to_dict()
+                # Here's where it gets complicated.
+                # Loop over data columns.
+                for column in dicti.keys():
+                    dseti = defaultdict(set)
+                    dsetj = defaultdict(set)
+                    # For each data column, the dataframe contains a
+                    # set of data which is keyed by the system index.
+                    # Each row is further keyed by the subindex, but
+                    # this test assumes that the subindices are
+                    # irrelevant (equivalent to saying the ordering of
+                    # rows - or the relative vertical position of data
+                    # in cells across columns - is not important.  Not
+                    # entirely true but anyway...)
+                    for idx in dicti[column].keys():
+                        dseti[idx[0]].add(dicti[column][idx])
+                        dsetj[idx[0]].add(dictj[column][idx])
+                    self.assertEqual(dseti, dsetj, msg='\n%s data column not equal for targets %s and %s' % (i, ikey, jkey))
+        
+
+if __name__ == '__main__':           
+    unittest.main()

From 3d1f598a5adf32453be35cc5dde8bea82077bc39 Mon Sep 17 00:00:00 2001
From: Lee-Ping Wang <leeping@Lee-Pings-MacBook-Pro.local>
Date: Sun, 6 Apr 2014 23:32:11 -0700
Subject: [PATCH 06/25] Clean up

---
 src/thermo.py                                 | 195 +++++++++++-------
 .../{Lipid_HARD => Lipid_MIX}/lipidcol1.txt   |   0
 .../targets/Lipid_MIX/lipidcol2a.txt          |   5 +
 .../{Lipid_HARD => Lipid_MIX}/scd323.txt      |   0
 .../{Lipid_HARD => Lipid_MIX}/scd333.txt      |   0
 .../{Lipid_HARD => Lipid_MIX}/scd338.txt      |   0
 .../{Lipid_HARD => Lipid_MIX}/scd353.txt      |   0
 studies/004_thermo/test_parse.in              |   2 +-
 8 files changed, 125 insertions(+), 77 deletions(-)
 rename studies/004_thermo/targets/{Lipid_HARD => Lipid_MIX}/lipidcol1.txt (100%)
 create mode 100644 studies/004_thermo/targets/Lipid_MIX/lipidcol2a.txt
 rename studies/004_thermo/targets/{Lipid_HARD => Lipid_MIX}/scd323.txt (100%)
 rename studies/004_thermo/targets/{Lipid_HARD => Lipid_MIX}/scd333.txt (100%)
 rename studies/004_thermo/targets/{Lipid_HARD => Lipid_MIX}/scd338.txt (100%)
 rename studies/004_thermo/targets/{Lipid_HARD => Lipid_MIX}/scd353.txt (100%)

diff --git a/src/thermo.py b/src/thermo.py
index 2579f4059..2df195d26 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -63,11 +63,11 @@ def parse(self):
             self.fields = fields
             # Skip over empty lines or comment lines.
             if self.is_empty_line():
-                logger.info("\x1b[96mempt\x1b[0m %s\n" % line.replace('\n',''))
+                logger.debug("\x1b[96mempt\x1b[0m %s\n" % line.replace('\n',''))
                 self.ln += 1
                 continue
             if self.is_comment_line():
-                logger.info("\x1b[96mcomm\x1b[0m %s\n" % line.replace('\n',''))
+                logger.debug("\x1b[96mcomm\x1b[0m %s\n" % line.replace('\n',''))
                 self.ln += 1
                 continue
             # Indicates metadata mode.
@@ -96,19 +96,20 @@ def parse(self):
                     meta[mkey].append(fld)
             # Set field start, field end, and field content for the header.
             if is_header:
-                logger.info("\x1b[1;96mhead\x1b[0m %s\n" % line.replace('\n',''))
+                logger.debug("\x1b[1;96mhead\x1b[0m %s\n" % line.replace('\n',''))
                 self.process_header()
             elif is_meta:
-                logger.info("\x1b[96mmeta\x1b[0m %s\n" % line.replace('\n',''))
+                logger.debug("\x1b[96mmeta\x1b[0m %s\n" % line.replace('\n',''))
             else:
                 # Build the row of data to be appended to the table.
                 # Loop through the fields in the header and inserts fields
                 # in the data line accordingly.  Ignores trailing tabs/spaces.
-                logger.info("\x1b[96mdata\x1b[0m %s\n" % line.replace('\n',''))
+                logger.debug("\x1b[96mdata\x1b[0m %s\n" % line.replace('\n',''))
                 table.append(self.process_data())
             self.ln += 1
         self.sanity_check()
-        printcool("%s parsed as %s" % (self.fnm.replace(os.getcwd()+'/',''), self.format), color=6)
+        if logger.level == DEBUG:
+            printcool("%s parsed as %s" % (self.fnm.replace(os.getcwd()+'/',''), self.format), color=6)
         self.metadata = meta
         self.table = table
         
@@ -330,7 +331,7 @@ def stand_head(head, obs):
     if len(usplit) > 1:
         hfirst = usplit[0]
         punit = re.sub('\)$','',usplit[1].strip())
-        print "header", head, "split into", hfirst, ",", punit
+        logger.debug("header %s split into %s, %s" % (head, hfirst, punit))
     else:
         hfirst = head
     newh = hfirst
@@ -342,13 +343,14 @@ def stand_head(head, obs):
     if all([o1, o2, o3]):
         obs = newh
     if newh != hfirst:
-        print "header", hfirst, "renamed to", newh
+        logger.debug("header %s renamed to %s\n" % (hfirst, newh))
     return newh, punit, obs
 
 class Thermo(Target):
     """
-    A target for fitting general experimental data sets. The
-    source data is described in a .txt file.
+    A target for fitting general experimental data sets. The source
+    data is described in a text file formatted according to the
+    Specification.
 
     """
     def __init__(self, options, tgt_opts, forcefield):
@@ -402,7 +404,6 @@ def read_source(self, srcfnm):
         logger.info('Parsing source file %s\n' % srcfnm)
         source = parse1(srcfnm)
         printcool_dictionary(source.metadata, title="Metadata")
-        # print source.table
         revhead = []
         obs = ''
 
@@ -424,7 +425,6 @@ def read_source(self, srcfnm):
             logger.error('\x1b[91mColumn headings are not unique!\x1b[0m\n')
             raise RuntimeError
 
-        print revhead
         if revhead[0] != 'index':
             logger.error('\x1b[91mIndex column heading is not present\x1b[0m\n(Add an Index column on the left!)\n')
             raise RuntimeError
@@ -441,7 +441,6 @@ def read_source(self, srcfnm):
         fref = OrderedDict()
         for rn, row in enumerate(source.table):
             this_insert = []
-            # crow = row[1:]
             thisidx = row[0]
             if thisidx != '': 
                 saveidx = thisidx
@@ -457,7 +456,14 @@ def read_source(self, srcfnm):
                 raise RuntimeError
             snum += 1
             if any([':' in fld for fld in row[1:]]):
-                # Here we insert rows from another data table.
+                # Here we read rows from another data table.  
+                # Other files may be referenced in the cell of a primary
+                # table using filename:column_number (numbered from 1).
+                # Rules: (1) No matter where the filename appears in the column,
+                # the column is inserted at the beginning of the system index.
+                # (2) There can only be one file per system index / column.
+                # (3) The column heading in the secondary file that's being
+                # referenced must match that of the reference in the primary file.
                 obs2 = ''
                 for cid_, fld in enumerate(row[1:]):
                     if ':' not in fld: continue
@@ -486,6 +492,8 @@ def reffld_error(reason=''):
                         reffld_error("Column heading of %s (%s) doesn't match original (%s)" % (fnm, head2, revhead[cid]))
                     fref[(saveidx, revhead[cid])] = [row2[fcol] for row2 in subfile.table]
 
+        # Insert the file-referenced data tables appropriately into
+        # our main data table.
         for (saveidx, head), newcol in fref.items():
             inum = 0
             for irow in range(len(source.table)):
@@ -499,81 +507,116 @@ def reffld_error(reason=''):
                 lrow += 1
                 nrow = ['' for i in range(len(revhead))]
                 nrow[cidx] = newcol[inum1]
-                print "Inserting", nrow, "after row", lrow
                 source.table.insert(lrow, nrow)
                 index.insert(lrow, (saveidx, inum1))
-
-            # for irow in range(
-            # for irow1 in range(max(0, len(newcol)-inum))
                 
         for rn, row in enumerate(source.table):
             drows.append([i if i != '' else np.nan for i in row[1:]])
 
-        print revhead[1:]
-        for rn, row in enumerate(drows):
-            print index[rn], row
-
+        # Turn it into a pandas DataFrame.
         self.Data = pd.DataFrame(drows, columns=revhead[1:], index=index)
-        print repr(self.Data)
+        return
 
-        # pd.DataFrame([OrderedDict([(head, row[i]) for i, head in revised_heading if row[i] != '']) for row in source.table])
+    def launch_simulation(self, index, simname):
 
+        """ 
 
-        # pd.DataFrame(OrderedDict([(head,[row[i] for row in source.table]) for i, head in enumerate(revised_heading)]))
-        # print self.Data.__repr__
-        # raw_input()
-        return
+        Launch a simulation - either locally or via the Work Queue.
+        This function is intended to be run within the folder:
+        target_name/iteration_number/system_index/simulation_name/initial_condition OR 
+        target_name/iteration_number/system_index/simulation_name
+        
+        """
+        
+        wq = getWorkQueue()
+        if not (os.path.exists('result.p') or os.path.exists('result.p.bz2')):
+            link_dir_contents(os.path.join(self.root,self.rundir),os.getcwd())
+            self.last_traj += [os.path.join(os.getcwd(), i) for i in self.extra_output]
+            self.liquid_mol[simnum%len(self.liquid_mol)].write(self.liquid_coords, ftype='tinker' if self.engname == 'tinker' else None)
+            cmdstr = '%s python npt.py %s %.3f %.3f' % (self.nptpfx, self.engname, temperature, pressure)
+            if wq == None:
+                logger.info("Running condensed phase simulation locally.\n")
+                logger.info("You may tail -f %s/npt.out in another terminal window\n" % os.getcwd())
+                _exec(cmdstr, copy_stderr=True, outfnm='npt.out')
+            else:
+                queue_up(wq, command = cmdstr+' &> npt.out',
+                         input_files = self.nptfiles + self.scripts + ['forcebalance.p'],
+                         output_files = ['npt_result.p.bz2', 'npt.out'] + self.extra_output, tgt=self)
+    
+    # NAMES FOR OBJECTS!  
 
-        # return
+    # Timeseries: Time series of an instantaneous observable that is
+    # returned by the MD simulation.
 
-        fp = open(expdata)
+    # Observable: A thermodynamic property which can be compared to
+    # experiment and possesses methods for calculating the property
+    # and its derivatives.
+
+    # State? Point? What should this be called??
+
+        # # print revhead[1:]
+        # # for rn, row in enumerate(drows):
+        # #     print index[rn], row
+
+        # # print repr(self.Data)
+
+        # # # pd.DataFrame([OrderedDict([(head, row[i]) for i, head in revised_heading if row[i] != '']) for row in source.table])
+
+
+        # # # pd.DataFrame(OrderedDict([(head,[row[i] for row in source.table]) for i, head in enumerate(revised_heading)]))
+        # # # print self.Data.__repr__
+        # # # raw_input()
+
+        # # return
+
+        # fp = open(expdata)
         
-        line         = fp.readline()
-        foundHeader  = False
-        names        = None
-        units        = None
-        label_header = None
-        label_unit   = None
-        count        = 0
-        metadata     = {}
-        while line:
-            # Skip comments and blank lines
-            if line.lstrip().startswith("#") or not line.strip():
-                line = fp.readline()
-                continue
-            # Metadata is denoted using 
-            if "=" in line: # Read variable
-                param, value = line.split("=")
-                param = param.strip().lower()
-                metadata[param] = value
-                # if param == "denoms":
-                #     for e, v in enumerate(value.split()):
-                #         self.denoms[self.quantities[e]] = float(v)
-                # elif param == "weights":
-                #     for e, v in enumerate(value.split()):
-                #         self.weights[self.quantities[e]] = float(v)
-            elif foundHeader: # Read exp data
-                count      += 1
-                vals        = line.split()
-                label       = (vals[0], label_header, label_unit)
-                refs        = np.array(vals[1:-2:2]).astype(float)
-                wts         = np.array(vals[2:-2:2]).astype(float)
-                temperature = float(vals[-2])
-                pressure    = None if vals[-1].lower() == "none" else \
-                  float(vals[-1])
-                dp = Point(count, label=label, refs=refs, weights=wts,
-                           names=names, units=units,
-                           temperature=temperature, pressure=pressure)
-                self.points.append(dp)
-            else: # Read headers
-                foundHeader = True
-                headers = zip(*[tuple(h.split("_")) for h in line.split()
-                                if h != "w"])
-                label_header = list(headers[0])[0]
-                label_unit   = list(headers[1])[0]
-                names        = list(headers[0][1:-2])
-                units        = list(headers[1][1:-2])
-            line = fp.readline()            
+        # line         = fp.readline()
+        # foundHeader  = False
+        # names        = None
+        # units        = None
+        # label_header = None
+        # label_unit   = None
+        # count        = 0
+        # metadata     = {}
+        # while line:
+        #     # Skip comments and blank lines
+        #     if line.lstrip().startswith("#") or not line.strip():
+        #         line = fp.readline()
+        #         continue
+        #     # Metadata is denoted using 
+        #     if "=" in line: # Read variable
+        #         param, value = line.split("=")
+        #         param = param.strip().lower()
+        #         metadata[param] = value
+        #         # if param == "denoms":
+        #         #     for e, v in enumerate(value.split()):
+        #         #         self.denoms[self.quantities[e]] = float(v)
+        #         # elif param == "weights":
+        #         #     for e, v in enumerate(value.split()):
+        #         #         self.weights[self.quantities[e]] = float(v)
+        #     elif foundHeader: # Read exp data
+        #         count      += 1
+        #         vals        = line.split()
+        #         label       = (vals[0], label_header, label_unit)
+        #         refs        = np.array(vals[1:-2:2]).astype(float)
+        #         wts         = np.array(vals[2:-2:2]).astype(float)
+        #         temperature = float(vals[-2])
+        #         pressure    = None if vals[-1].lower() == "none" else \
+        #           float(vals[-1])
+        #         dp = Point(count, label=label, refs=refs, weights=wts,
+        #                    names=names, units=units,
+        #                    temperature=temperature, pressure=pressure)
+        #         self.points.append(dp)
+        #     else: # Read headers
+        #         foundHeader = True
+        #         headers = zip(*[tuple(h.split("_")) for h in line.split()
+        #                         if h != "w"])
+        #         label_header = list(headers[0])[0]
+        #         label_unit   = list(headers[1])[0]
+        #         names        = list(headers[0][1:-2])
+        #         units        = list(headers[1][1:-2])
+        #     line = fp.readline()            
     
     def retrieve(self, dp):
         """Retrieve the molecular dynamics (MD) results and store the calculated
diff --git a/studies/004_thermo/targets/Lipid_HARD/lipidcol1.txt b/studies/004_thermo/targets/Lipid_MIX/lipidcol1.txt
similarity index 100%
rename from studies/004_thermo/targets/Lipid_HARD/lipidcol1.txt
rename to studies/004_thermo/targets/Lipid_MIX/lipidcol1.txt
diff --git a/studies/004_thermo/targets/Lipid_MIX/lipidcol2a.txt b/studies/004_thermo/targets/Lipid_MIX/lipidcol2a.txt
new file mode 100644
index 000000000..8d97a0bcc
--- /dev/null
+++ b/studies/004_thermo/targets/Lipid_MIX/lipidcol2a.txt
@@ -0,0 +1,5 @@
+Index   T           P (atm)   Al            Al_wt         Scd1_idx      Scd1          Scd2_idx      Scd2          Scd1_wt       Kappa         Kappa_wt      n_ic
+50C     323.15      1         0.631         1             scd323.txt:1  scd323.txt:2  scd323.txt:3  scd323.txt:4  1             58            1             10
+60C     333.15      1         0.65          1             scd333.txt:1  scd333.txt:2  scd333.txt:3  scd333.txt:4  0             58            0             10
+65C     338.15      1         0.671         1             scd338.txt:1  scd338.txt:2  scd338.txt:3  scd338.txt:4  1             58            0             10
+80C     353.15      1         0.719         1             scd353.txt:1  scd353.txt:2  scd353.txt:3  scd353.txt:4  1             58            0             10
diff --git a/studies/004_thermo/targets/Lipid_HARD/scd323.txt b/studies/004_thermo/targets/Lipid_MIX/scd323.txt
similarity index 100%
rename from studies/004_thermo/targets/Lipid_HARD/scd323.txt
rename to studies/004_thermo/targets/Lipid_MIX/scd323.txt
diff --git a/studies/004_thermo/targets/Lipid_HARD/scd333.txt b/studies/004_thermo/targets/Lipid_MIX/scd333.txt
similarity index 100%
rename from studies/004_thermo/targets/Lipid_HARD/scd333.txt
rename to studies/004_thermo/targets/Lipid_MIX/scd333.txt
diff --git a/studies/004_thermo/targets/Lipid_HARD/scd338.txt b/studies/004_thermo/targets/Lipid_MIX/scd338.txt
similarity index 100%
rename from studies/004_thermo/targets/Lipid_HARD/scd338.txt
rename to studies/004_thermo/targets/Lipid_MIX/scd338.txt
diff --git a/studies/004_thermo/targets/Lipid_HARD/scd353.txt b/studies/004_thermo/targets/Lipid_MIX/scd353.txt
similarity index 100%
rename from studies/004_thermo/targets/Lipid_HARD/scd353.txt
rename to studies/004_thermo/targets/Lipid_MIX/scd353.txt
diff --git a/studies/004_thermo/test_parse.in b/studies/004_thermo/test_parse.in
index 858f4e6ab..9a5503e68 100644
--- a/studies/004_thermo/test_parse.in
+++ b/studies/004_thermo/test_parse.in
@@ -144,7 +144,7 @@ eq_steps 50000
 $end
 
 $target
-name Lipid_HARD
+name Lipid_MIX
 type Thermo_GMX
 weight 1.0
 source lipidcol1.txt

From 66f82df9fb4146ff719d387bbf786920a2a8c57a Mon Sep 17 00:00:00 2001
From: Lee-Ping Wang <leeping@Lee-Pings-MacBook-Pro.local>
Date: Mon, 7 Apr 2014 06:49:46 -0700
Subject: [PATCH 07/25] Create list of Ensembles and table of Observable
 objects

---
 src/{quantity.py => observable.py} |  87 +++++++++-------
 src/parser.py                      |   5 +-
 src/thermo.py                      | 154 ++++++++++++++++++++++-------
 studies/004_thermo/test_parse.in   |  16 +--
 4 files changed, 179 insertions(+), 83 deletions(-)
 rename src/{quantity.py => observable.py} (81%)

diff --git a/src/quantity.py b/src/observable.py
similarity index 81%
rename from src/quantity.py
rename to src/observable.py
index 44b137068..4a2e96ebb 100644
--- a/src/quantity.py
+++ b/src/observable.py
@@ -63,44 +63,49 @@ def energy_driver(mvals_):
         G[i,:]   = EDG[:]
     return G
 
-class Quantity(object):
+class Observable(object):
     """
-    Base class for thermodynamical quantity used for fitting. This can
+    Base class for thermodynamical observable used for fitting. This can
     be any experimental data that can be calculated as an ensemble
     average from a simulation.
 
     Data attributes
     ---------------
     name : string
-        Identifier for the quantity that is specified in `quantities` in Target
+        Identifier for the observable that is specified in `observables` in Target
         options.
-    engname : string
-        Use this engine to extract the quantity from the simulation results.
-        At present, only `gromacs` is supported.
-    temperature : float
-        Calculate the quantity at this temperature (in K).
-    pressure : float
-        Calculate the quantity at this pressure (in bar).
-        
+    sreq : list of (strings or lists)
+        The names of simulation types that are needed to calculate
+        this observable.  If a string is added to the list, then that
+        simulation is *required* to calculate the observable.  If a
+        list is added, then any simulation within that type is allowed,
+        but the first member of the list is used by default.
+    treq : list of strings
+        The names of timeseries from each simulation that are needed to
+        calculate this observable.
+    dreq : list of strings
+        The names of timeseries from each simulation - in addition to
+        "treq" - that are needed to *differentiate* this observable.
+        (Usually energy derivatives)
     """
-    def __init__(self, engname, temperature, pressure, name=None):
+    def __init__(self, source, name=None):
         self.name        = name if name is not None else "empty"
-        self.engname     = engname
-        self.temperature = temperature
-        self.pressure    = pressure
+        self.sreq = []
+        self.treq = []
+        self.dreq = ['energy_derivatives']
                     
     def __str__(self):
-        return "quantity is " + self.name.capitalize() + "."
+        return "observable is " + self.name.capitalize() + "."
 
     def extract(self, engines, FF, mvals, h, AGrad=True):
-        """Calculate and extract the quantity from MD results. How this is done
-        depends on the quantity and the engine so this must be
+        """Calculate and extract the observable from MD results. How this is done
+        depends on the observable and the engine so this must be
         implemented in the subclass.
 
         Parameters
         ----------
         engines : list
-            A list of Engine objects that are requred to calculate the quantity.
+            A list of Engine objects that are requred to calculate the observable.
         FF : FF
             Force field object.
         mvals : list
@@ -114,23 +119,29 @@ def extract(self, engines, FF, mvals, h, AGrad=True):
         -------
         result : (float, float, np.array)
             The returned tuple is (Q, Qerr, Qgrad), where Q is the calculated
-            quantity, Qerr is the calculated standard deviation of the quantity,
+            observable, Qerr is the calculated standard deviation of the observable,
             and Qgrad is a M-array with the calculated gradients for the
-            quantity, with M being the number of force field parameters that are
+            observable, with M being the number of force field parameters that are
             being fitted. 
         
         """
         logger.error("Extract method not implemented in base class.\n")    
         raise NotImplementedError
 
-# class Quantity_Density
-class Quantity_Density(Quantity):
-    def __init__(self, engname, temperature, pressure, name=None):
+# class Observable_Density
+class Observable_Density(Observable):
+    def __init__(self, source, name=None):
         """ Density. """
-        super(Quantity_Density, self).__init__(engname, temperature, pressure, name)
+        super(Observable_Density, self).__init__(source, name)
         
         self.name = name if name is not None else "density"
 
+        # Calculating the density requires either a liquid or solid simulation.
+        self.sreq = [['liquid', 'solid']]
+
+        # Requires timeseries of densities from the simulation.
+        self.treq = ['density']
+
     def extract(self, engines, FF, mvals, h, pgrad, AGrad=True):         
         #==========================================#
         #  Physical constants and local variables. #
@@ -182,9 +193,9 @@ def extract(self, engines, FF, mvals, h, pgrad, AGrad=True):
                   color=4, bold=True)    
         G = energy_derivatives(engines[0], FF, mvals, h, pgrad, len(Energy), AGrad)
         
-        #=======================================#
-        #  Quantity properties and derivatives. #
-        #=======================================#
+        #=========================================#
+        #  Observable properties and derivatives. #
+        #=========================================#
         # Average and error.
         Rho_avg, Rho_err = mean_stderr(Density)
         # Analytic first derivative.
@@ -193,14 +204,20 @@ def extract(self, engines, FF, mvals, h, pgrad, AGrad=True):
             
         return Rho_avg, Rho_err, Rho_grad
 
-# class Quantity_H_vap
-class Quantity_H_vap(Quantity):
-    def __init__(self, engname, temperature, pressure, name=None):
+# class Observable_H_vap
+class Observable_H_vap(Observable):
+    def __init__(self, source, name=None):
         """ Enthalpy of vaporization. """
-        super(Quantity_H_vap, self).__init__(engname, temperature, pressure, name)
+        super(Observable_H_vap, self).__init__(source, name)
         
         self.name = name if name is not None else "H_vap"
 
+        # Calculating the heat of vaporization requires a liquid simulation and a gas simulation.
+        self.sreq = ['liquid', 'gas']
+
+        # Requires timeseries of energies and volumes from the simulation.
+        self.treq = ['energy', 'volume']
+
     def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): 
         #==========================================#
         #  Physical constants and local variables. #
@@ -274,9 +291,9 @@ def extract(self, engines, FF, mvals, h, pgrad, AGrad=True):
         G  = energy_derivatives(engines[0], FF, mvals, h, pgrad, len(Energy), AGrad)
         Gm = energy_derivatives(engines[1], FF, mvals, h, pgrad, len(mEnergy), AGrad)
                 
-        #=======================================#
-        #  Quantity properties and derivatives. #
-        #=======================================#
+        #=========================================#
+        #  Observable properties and derivatives. #
+        #=========================================#
         # Average and error.
         E_avg, E_err     = mean_stderr(Energy)
         Em_avg, Em_err   = mean_stderr(mEnergy)
diff --git a/src/parser.py b/src/parser.py
index 40b90a8df..340b45723 100644
--- a/src/parser.py
+++ b/src/parser.py
@@ -157,7 +157,7 @@
                  "engine" : (None, 180, 'The external code used to execute the simulations (GMX, TINKER, AMBER, OpenMM)', 'All targets (important)', '')
                  },
     'lists'   : {"fd_ptypes" : ([], -100, 'The parameter types that are differentiated using finite difference', 'In conjunction with fdgrad, fdhess, fdhessdiag; usually not needed'),
-                 "quantities" : ([], 100, 'List of quantities to be fitted, each must have corresponding Quantity subclass', 'Thermodynamic properties target', 'thermo'),
+                 "observables" : ([], 100, 'List of observables to be fitted, each must have corresponding Quantity subclass', 'Thermodynamic properties target', 'thermo'),
                  },
     'ints'    : {"shots"              : (-1, 0, 'Number of snapshots; defaults to all of the snapshots', 'Energy + Force Matching', 'AbInitio'),
                  "fitatoms"           : (0, 0, 'Number of fitting atoms; defaults to all of them', 'Energy + Force Matching', 'AbInitio'),
@@ -174,7 +174,7 @@
                  "save_traj"          : (0, -10, 'Whether to save trajectories.  0 = Never save; 1 = Delete if optimization step is good; 2 = Always save', 'Condensed phase properties', 'Liquid, Lipid'),
                  "eq_steps"           : (20000, 0, 'Number of time steps for the equilibration run.', 'Thermodynamic property targets', 'thermo'),
                  "md_steps"           : (50000, 0, 'Number of time steps for the production run.', 'Thermodynamic property targets', 'thermo'),
-                 "n_sim_chain"        : (1, 0, 'Number of simulations required to calculate quantities.', 'Thermodynamic property targets', 'thermo'),
+                 "n_sim_chain"        : (1, 0, 'Number of simulations required to calculate observables.', 'Thermodynamic property targets', 'thermo'),
                  },
     'bools'   : {"whamboltz"        : (0, -100, 'Whether to use WHAM Boltzmann Weights', 'Ab initio targets with Boltzmann weights (advanced usage)', 'AbInitio'),
                  "sampcorr"         : (0, -150, 'Whether to use the archaic sampling correction', 'Energy + Force Matching, very old option, do not use', 'AbInitio'),
@@ -285,6 +285,7 @@
         "lipid_prod_steps" : "lipid_md_steps",
         "lipid_equ_steps" : "lipid_eq_steps",
         "expdata_txt" : "source", 
+        "quantities" : "observables", 
         }
 
 ## Listing of sections in the input file.
diff --git a/src/thermo.py b/src/thermo.py
index 2df195d26..66c8115d0 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -7,6 +7,7 @@
 import itertools
 import cStringIO
 
+from forcebalance.observable import *
 from forcebalance.target import Target
 from forcebalance.finite_difference import in_fd
 from forcebalance.nifty import flat, col, row, isint
@@ -346,6 +347,17 @@ def stand_head(head, obs):
         logger.debug("header %s renamed to %s\n" % (hfirst, newh))
     return newh, punit, obs
 
+def determine_needed_simulations(observables):
+
+    """ Given a list of Observable objects, determine the list of
+    simulations that are needed to calculate all of them. """
+
+    sreqs = OrderedDict()
+    for obs in observables:
+        sreqs[obs.name] = obs.sreq[:]
+    print sreqs
+
+
 class Thermo(Target):
     """
     A target for fitting general experimental data sets. The source
@@ -360,8 +372,8 @@ def __init__(self, options, tgt_opts, forcefield):
         ## Parameters
         # Source data (experimental data, model parameters and weights)
         self.set_option(tgt_opts, "source", forceprint=True)
-        # Quantities to calculate
-        self.set_option(tgt_opts, "quantities", forceprint=True)
+        # Observables to calculate
+        self.set_option(tgt_opts, "observables", "observable_names", forceprint=True)
         # Length of simulation chain
         self.set_option(tgt_opts, "n_sim_chain", forceprint=True)
         # Number of time steps in the equilibration run
@@ -372,21 +384,30 @@ def __init__(self, options, tgt_opts, forcefield):
         ## Variables
         # Prefix names for simulation data
         self.simpfx    = "sim"
-        # Data points for quantities
+        # Data points for observables
         self.points    = []
-        # Denominators for quantities
+        # Denominators for observables
         self.denoms    = {}
-        # Weights for quantities
+        # Weights for observables
         self.weights   = {}
 
-        ## Read source data and initialize points
+        ## A mapping that takes us from observable names to Observable objects.
+        self.Observable_Map = {'density' : Observable_Density,
+                               'rho' : Observable_Density,
+                               'hvap' : Observable_H_vap,
+                               'h_vap' : Observable_H_vap}
+
+        ## Read source data and initialize points; creates self.Data, self.Ensembles and self.Observables objects.
         self.read_source(os.path.join(self.root, self.tgtdir, self.source))
         
         ## Copy run scripts from ForceBalance installation directory
         for f in self.scripts:
             LinkFile(os.path.join(os.path.split(__file__)[0], "data", f),
                      os.path.join(self.root, self.tempdir, f))
-    
+
+        ## Set up simulations
+        self.prepare_simulations()
+
     def read_source(self, srcfnm):
         """Read and store source data.
 
@@ -406,7 +427,7 @@ def read_source(self, srcfnm):
         printcool_dictionary(source.metadata, title="Metadata")
         revhead = []
         obs = ''
-
+        obsnames = []
 
         units = defaultdict(str)
 
@@ -415,6 +436,7 @@ def read_source(self, srcfnm):
                 revhead.append('index')
                 continue
             newh, punit, obs = stand_head(head, obs)
+            if obs not in obsnames + ['temp', 'pres', 'n_ic']: obsnames.append(obs)
             revhead.append(newh)
             if punit != '':
                 units[newh] = punit
@@ -514,7 +536,63 @@ def reffld_error(reason=''):
             drows.append([i if i != '' else np.nan for i in row[1:]])
 
         # Turn it into a pandas DataFrame.
-        self.Data = pd.DataFrame(drows, columns=revhead[1:], index=index)
+        self.Data = pd.DataFrame(drows, columns=revhead[1:], index=pd.MultiIndex.from_tuples(index, names=['ensemble', 'subindex']))
+
+        # A list of ensembles (i.e. top-level indices) which correspond
+        # to sets of simulations that we'll be running.
+        self.Ensembles = []
+        for idx in self.Data.index:
+            if idx[0] not in self.Ensembles:
+                self.Ensembles.append(idx[0])
+
+        # A list of Observable objects (i.e. column headings) which
+        # contain methods for calculating observables that we need.
+        # Think about: 
+        # (1) How much variability is allowed across Ensembles?
+        #     For instance, different S_cd is permissible.
+        self.Observables = OrderedDict()
+        for obsname in [stand_head(i, '')[2] for i in self.observable_names]:
+            if obsname in self.Observables:
+                logger.error('%s was already specified as an observable' % (obsname))
+            self.Observables[obsname] = OrderedDict()
+            for ie, ensemble in enumerate(self.Ensembles):
+                if obsname in self.Observable_Map:
+                    newobs = self.Observable_Map[obsname](source=self.Data.ix[ensemble])
+                    logger.info('%s is specified as an observable, appending %s class\n' % (obsname, newobs.__class__.__name__))
+                    self.Observables[obsname][ensemble] = newobs
+                else:
+                    logger.warn('%s is specified but there is no corresponding Observable class, appending empty one\n' % obsname)
+                    self.Observables[obsname][ensemble] = Observable(name=obsname, source=self.Data.ix[ensemble])
+
+        # for ensemble in self.Ensembles:
+        #     self.Observables[ensemble] = []
+        # for obsname in obsnames:
+        #     for ensemble, ie in enumerate(self.Ensembles):
+        #         if obsname in self.Observable_Map:
+        #             newobs = self.Observable_Map[obsname](source=self.Data.ix[ensemble])
+        #             if newobs.name in [obs.name for obs in self.Observables[ensemble]]:
+        #                 logger.error('%s is specified but a %s observable already exists' % (obsname, newobs.__class__.__name__))
+        #             logger.info('%s is specified as an observable, appending %s class\n' % (obsname, newobs.__class__.__name__))
+        #             self.Observables[ensemble].append(newobs)
+        #         else:
+        #             logger.warn('%s is specified but there is no corresponding Observable class, appending empty one\n' % obsname)
+        #             self.Observables[ensemble].append(Observable(name=obsname, source=self.Data.ix[ensemble]))
+        return
+
+    def prepare_simulations(self):
+
+        """ 
+
+        Prepare simulations to be launched.  Set initial conditions
+        and create directories.  This function is intended to be run
+        at the start of each optimization cycle, so that initial
+        conditions may be easily set.
+
+        """
+
+        # The list of simulations that we'll be running.
+        self.Simulations = OrderedDict([(i, []) for i in self.Ensembles])
+        
         return
 
     def launch_simulation(self, index, simname):
@@ -591,10 +669,10 @@ def launch_simulation(self, index, simname):
         #         metadata[param] = value
         #         # if param == "denoms":
         #         #     for e, v in enumerate(value.split()):
-        #         #         self.denoms[self.quantities[e]] = float(v)
+        #         #         self.denoms[self.observables[e]] = float(v)
         #         # elif param == "weights":
         #         #     for e, v in enumerate(value.split()):
-        #         #         self.weights[self.quantities[e]] = float(v)
+        #         #         self.weights[self.observables[e]] = float(v)
         #     elif foundHeader: # Read exp data
         #         count      += 1
         #         vals        = line.split()
@@ -620,12 +698,12 @@ def launch_simulation(self, index, simname):
     
     def retrieve(self, dp):
         """Retrieve the molecular dynamics (MD) results and store the calculated
-        quantities in the Point object dp.
+        observables in the Point object dp.
 
         Parameters
         ----------
         dp : Point
-            Store the calculated quantities in this point.
+            Store the calculated observables in this point.
 
         Returns
         -------
@@ -647,9 +725,9 @@ def retrieve(self, dp):
             msg = 'The file ' + abspath + ' does not exist so we cannot read it.\n'
             logger.warning(msg)
 
-            dp.data["values"] = np.zeros((len(self.quantities)))
-            dp.data["errors"] = np.zeros((len(self.quantities)))
-            dp.data["grads"]  = np.zeros((len(self.quantities), self.FF.np))
+            dp.data["values"] = np.zeros((len(self.observables)))
+            dp.data["errors"] = np.zeros((len(self.observables)))
+            dp.data["grads"]  = np.zeros((len(self.observables), self.FF.np))
             
     def submit_jobs(self, mvals, AGrad=True, AHess=True):
         """This routine is called by Objective.stage() and will run before "get".
@@ -696,7 +774,7 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True):
                 
             # Run the simulation chain for point.        
             cmdstr = ("%s python md_chain.py " % self.mdpfx +
-                      " ".join(self.quantities) + " " +
+                      " ".join(self.observables) + " " +
                       "--engine %s " % self.engname +
                       "--length %d " % self.n_sim_chain + 
                       "--name %s " % self.simpfx +
@@ -735,7 +813,7 @@ def print_item(key, physunit):
                                    (self.Xp[key], self.Wp[key],
                                     self.Xp[key]*self.Wp[key])))
 
-        for i, q in enumerate(self.quantities):
+        for i, q in enumerate(self.observables):
             print_item(q, self.points[0].ref["units"][i])
 
         PrintDict['Total'] = "% 10s % 8s % 14.5e" % ("","", self.Objective)
@@ -745,14 +823,14 @@ def print_item(key, physunit):
         printcool_dictionary(PrintDict, color=4, title=Title, keywidth=31)
         return
 
-    def objective_term(self, quantity):
+    def objective_term(self, observable):
         """Calculates the contribution to the objective function (the term) for a
-        given quantity.
+        given observable.
 
         Parameters
         ----------
-        quantity : string
-            Calculate the objective term for this quantity.
+        observable : string
+            Calculate the objective term for this observable.
 
         Returns
         -------
@@ -767,18 +845,18 @@ def objective_term(self, quantity):
         Gradient  = np.zeros(self.FF.np)
         Hessian   = np.zeros((self.FF.np, self.FF.np))
 
-        # Grab ref data for quantity        
-        qid       = self.quantities.index(quantity)
+        # Grab ref data for observable        
+        qid       = self.observables.index(observable)
         Exp       = np.array([pt.ref["refs"][qid] for pt in self.points])
         Weights   = np.array([pt.ref["weights"][qid] for pt in self.points])
-        Denom     = self.denoms[quantity]
+        Denom     = self.denoms[observable]
             
         # Renormalize weights
         Weights /= np.sum(Weights)
         logger.info("Renormalized weights to " + str(np.sum(Weights)) + "\n")
-        logger.info(("Physical quantity '%s' uses denominator = %g %s\n" %
-                     (quantity.capitalize(), Denom,
-                      self.points[0].ref["units"][self.quantities.index(quantity)])))
+        logger.info(("Physical observable '%s' uses denominator = %g %s\n" %
+                     (observable.capitalize(), Denom,
+                      self.points[0].ref["units"][self.observables.index(observable)])))
 
         # Grab calculated values        
         values = np.array([pt.data["values"][qid] for pt in self.points])
@@ -814,7 +892,7 @@ def objective_term(self, quantity):
             GradMapPrint.append([' %8.2f %8.1f' % (temp, press)] +
                                 ["% 9.3e" % i for i in grads[pt.idnr-1]])
 
-        o = wopen('gradient_%s.dat' % quantity)
+        o = wopen('gradient_%s.dat' % observable)
         for line in GradMapPrint:
             print >> o, ' '.join(line)
         o.close()
@@ -831,7 +909,7 @@ def objective_term(self, quantity):
 
     def get(self, mvals, AGrad=True, AHess=True):
         """Return the contribution to the total objective function. This is a
-        weighted average of the calculated quantities.
+        weighted average of the calculated observables.
 
         Parameters
         ----------
@@ -863,16 +941,16 @@ def get(self, mvals, AGrad=True, AHess=True):
 
         obj        = OrderedDict()
         reweighted = []
-        for q in self.quantities:
+        for q in self.observables:
             # Returns dict with keys "X"=objective term value, "G"=the
             # gradient, "H"=the hessian, and "info"=printed info about points
             obj[q] = self.objective_term(q)
         
-            # Apply weights for quantities (normalized)
+            # Apply weights for observables (normalized)
             if obj[q]["X"] == 0:
                 self.weights[q] = 0.0
 
-            # Store weights sorted in the order of self.quantities
+            # Store weights sorted in the order of self.observables
             reweighted.append(self.weights[q])
         
         # Normalize weights
@@ -880,16 +958,16 @@ def get(self, mvals, AGrad=True, AHess=True):
         wtot        = np.sum(reweighted)
         reweighted  = reweighted/wtot if wtot > 0 else reweighted
          
-        # Picks out the "X", "G" and "H" keys for the quantities sorted in the
-        # order of self.quantities. Xs is N-array, Gs is NxM-array and Hs is
-        # NxMxM-array, where N is number of quantities and M is number of
+        # Picks out the "X", "G" and "H" keys for the observables sorted in the
+        # order of self.observables. Xs is N-array, Gs is NxM-array and Hs is
+        # NxMxM-array, where N is number of observables and M is number of
         # parameters.
         Xs = np.array([dic["X"] for dic in obj.values()])
         Gs = np.array([dic["G"] for dic in obj.values()])
         Hs = np.array([dic["H"] for dic in obj.values()])
                                 
         # Target contribution is (normalized) weighted averages of the
-        # individual quantity terms.
+        # individual observable terms.
         Objective    = np.average(Xs, weights=(None if np.all(reweighted == 0) else \
                                                reweighted), axis=0)
         if AGrad:
@@ -902,7 +980,7 @@ def get(self, mvals, AGrad=True, AHess=True):
         if not in_fd():
             # Store results to show with indicator() function
             self.Xp = {q : dic["X"] for (q, dic) in obj.items()}
-            self.Wp = {q : reweighted[self.quantities.index(q)]
+            self.Wp = {q : reweighted[self.observables.index(q)]
                        for (q, dic) in obj.items()}
             self.Pp = {q : dic["info"] for (q, dic) in obj.items()}
 
diff --git a/studies/004_thermo/test_parse.in b/studies/004_thermo/test_parse.in
index 9a5503e68..4eb82ab64 100644
--- a/studies/004_thermo/test_parse.in
+++ b/studies/004_thermo/test_parse.in
@@ -71,7 +71,7 @@ name LiquidBromine
 type Thermo_GMX
 weight 1.0
 source expset.txt
-quantities density h_vap
+observables density h_vap
 n_sim_chain 2
 md_steps 100000
 eq_steps 50000
@@ -82,7 +82,7 @@ name LiquidBromine_CSV
 type Thermo_GMX
 weight 1.0
 source data.csv
-quantities density h_vap
+observables density h_vap
 n_sim_chain 2
 md_steps 100000
 eq_steps 50000
@@ -93,7 +93,7 @@ name LiquidBromine_TAB
 type Thermo_GMX
 weight 1.0
 source data.tab.txt
-quantities density h_vap
+observables density h_vap
 n_sim_chain 2
 md_steps 100000
 eq_steps 50000
@@ -104,7 +104,7 @@ name Lipid_SPC
 type Thermo_GMX
 weight 1.0
 source lipidcol1.txt
-quantities density h_vap
+observables al scd kappa
 n_sim_chain 2
 md_steps 100000
 eq_steps 50000
@@ -115,7 +115,7 @@ name Lipid_RIT
 type Thermo_GMX
 weight 1.0
 source lipidcol1.txt
-quantities density h_vap
+observables al scd kappa
 n_sim_chain 2
 md_steps 100000
 eq_steps 50000
@@ -126,7 +126,7 @@ name Lipid_TAB
 type Thermo_GMX
 weight 1.0
 source lipidcol1.txt
-quantities density h_vap
+observables al scd kappa
 n_sim_chain 2
 md_steps 100000
 eq_steps 50000
@@ -137,7 +137,7 @@ name Lipid_MUL
 type Thermo_GMX
 weight 1.0
 source lipidcol2a.txt
-quantities density h_vap
+observables al scd kappa
 n_sim_chain 2
 md_steps 100000
 eq_steps 50000
@@ -148,7 +148,7 @@ name Lipid_MIX
 type Thermo_GMX
 weight 1.0
 source lipidcol1.txt
-quantities density h_vap
+observables al scd kappa
 n_sim_chain 2
 md_steps 100000
 eq_steps 50000

From e3676f2887e79ca1ed0d2e5d68720a415c8c9afc Mon Sep 17 00:00:00 2001
From: Lee-Ping Wang <leeping@Lee-Pings-MacBook-Pro.local>
Date: Mon, 7 Apr 2014 07:08:39 -0700
Subject: [PATCH 08/25] Map observable names to required simulations

---
 src/thermo.py | 66 +++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 54 insertions(+), 12 deletions(-)

diff --git a/src/thermo.py b/src/thermo.py
index 66c8115d0..1d752e3de 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -1,6 +1,7 @@
 import os
 import re
 import csv
+import copy
 import errno
 import numpy as np
 import pandas as pd
@@ -347,17 +348,6 @@ def stand_head(head, obs):
         logger.debug("header %s renamed to %s\n" % (hfirst, newh))
     return newh, punit, obs
 
-def determine_needed_simulations(observables):
-
-    """ Given a list of Observable objects, determine the list of
-    simulations that are needed to calculate all of them. """
-
-    sreqs = OrderedDict()
-    for obs in observables:
-        sreqs[obs.name] = obs.sreq[:]
-    print sreqs
-
-
 class Thermo(Target):
     """
     A target for fitting general experimental data sets. The source
@@ -579,6 +569,57 @@ def reffld_error(reason=''):
         #             self.Observables[ensemble].append(Observable(name=obsname, source=self.Data.ix[ensemble]))
         return
 
+    def determine_simulations(self):
+
+        """ 
+        Determine which simulations need to be run.  The same
+        simulations are run for each ensemble in the data set.
+        """
+
+        # Determine which simulations are needed.
+        sreqs = OrderedDict()
+        for obsname in self.Observables:
+            sreqs[obsname] = self.Observables[obsname][self.Ensembles[0]].sreq
+
+        def narrow():
+            # Get the names of simulations that are REQUIRED to calculate the observables.
+            toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs]))
+            # Whoa, this is a deeply nested loop.  What does it do?
+            # First loop over the elements in "sreqs" for each observable name.
+            # If the element is a string, then it's a required simulation name (top level).
+            # If the element is a list, then it's a list of valid simulation names
+            # and we need to narrow the list down.
+            # For the ones that are lists (and have any intersection with the top level),
+            # delete the ones that don't intersect.
+            sreq0 = copy.deepcopy(sreqs)
+            for obsname in sreqs:
+                for sims in sreqs[obsname]:
+                    if type(sims) == list:
+                        if len(sims) == 1:
+                            sreqs[obsname] = [sims[0]]
+                        elif any([i in sims for i in toplevel]):
+                            for j in sims:
+                                if j not in toplevel: sims.remove(j)
+            return sreqs != sreq0
+
+        print sreqs
+        while narrow():
+            print sreqs
+        # For the leftover observables where there is still some ambiguity,
+        # we attempt 
+        # To do: Figure this out from existing initial conditions maybe
+        for obsname in sreqs:
+            for sims in sreqs[obsname]:
+                if type(sims) == list:
+                    for sim in sims:
+                        if has_ic(sim):
+                            sreqs[obsname] = [sim]
+                        
+
+        self.Simulations = OrderedDict([(i, []) for i in self.Ensembles])
+        
+        return
+
     def prepare_simulations(self):
 
         """ 
@@ -589,7 +630,8 @@ def prepare_simulations(self):
         conditions may be easily set.
 
         """
-
+        # print narrow()
+            
         # The list of simulations that we'll be running.
         self.Simulations = OrderedDict([(i, []) for i in self.Ensembles])
         

From 2b6df36aa274cdd6f36e9c8151255091df5d402c Mon Sep 17 00:00:00 2001
From: Lee-Ping <leeping@stanford.edu>
Date: Tue, 8 Apr 2014 01:33:05 -0700
Subject: [PATCH 09/25] Start modifying framework to require user input
 simulations.

---
 src/observable.py                             |  24 +-
 src/thermo.py                                 | 319 +++++++++++++++---
 studies/004_thermo/single.in                  |   2 +-
 .../LiquidBromine/1/{sim2.gro => gas.gro}     |   0
 .../LiquidBromine/1/{sim2.mdp => gas.mdp}     |   0
 .../LiquidBromine/1/{sim2.top => gas.top}     |   0
 .../LiquidBromine/1/{sim1.gro => liquid.gro}  |   0
 .../LiquidBromine/1/{sim1.mdp => liquid.mdp}  |   0
 .../LiquidBromine/1/{sim1.top => liquid.top}  |   0
 .../targets/LiquidBromine/expset.txt          |   2 +-
 10 files changed, 289 insertions(+), 58 deletions(-)
 rename studies/004_thermo/targets/LiquidBromine/1/{sim2.gro => gas.gro} (100%)
 rename studies/004_thermo/targets/LiquidBromine/1/{sim2.mdp => gas.mdp} (100%)
 rename studies/004_thermo/targets/LiquidBromine/1/{sim2.top => gas.top} (100%)
 rename studies/004_thermo/targets/LiquidBromine/1/{sim1.gro => liquid.gro} (100%)
 rename studies/004_thermo/targets/LiquidBromine/1/{sim1.mdp => liquid.mdp} (100%)
 rename studies/004_thermo/targets/LiquidBromine/1/{sim1.top => liquid.top} (100%)

diff --git a/src/observable.py b/src/observable.py
index 4a2e96ebb..a1139105a 100644
--- a/src/observable.py
+++ b/src/observable.py
@@ -213,10 +213,10 @@ def __init__(self, source, name=None):
         self.name = name if name is not None else "H_vap"
 
         # Calculating the heat of vaporization requires a liquid simulation and a gas simulation.
-        self.sreq = ['liquid', 'gas']
+        self.sreq = [['liquid'], ['gas']]
 
         # Requires timeseries of energies and volumes from the simulation.
-        self.treq = ['energy', 'volume']
+        self.treq = [['energy', 'volume'], ['energy']]
 
     def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): 
         #==========================================#
@@ -315,3 +315,23 @@ def extract(self, engines, FF, mvals, h, pgrad, AGrad=True):
 
         return Hvap_avg, Hvap_err, Hvap_grad
 
+# class Observable_Kappa
+class Observable_Kappa(Observable):
+    def __init__(self, source, name=None):
+        """ Compressibility (applies to liquid and lipid bilayer.) """
+        super(Observable_H_vap, self).__init__(source, name)
+        
+        self.name = name if name is not None else "H_vap"
+
+        # List of dictionaries of simulations, keyed to timeseries to extract from the simulation.
+        # Each dictionary represents a simulation in a sequence, but the observable isn't mapped to a unique simulation each time.
+        # Because of this, we determine which simulation to launch (in the sequence) based on the available initial coordinates (or explicit user input).
+        # Depending on which simulation is executed, we require different timeseries from the simulation, and different formulas.
+        # But another way is to just define two observables ... need to think about it.
+
+        self.sreq = [{'liquid':['volume'], 'bilayer':['al']},
+                     ]
+        
+
+        # Requires timeseries of energies and volumes from the simulation.
+        self.treq = [['energy', 'volume'], ['energy']]
diff --git a/src/thermo.py b/src/thermo.py
index 1d752e3de..93e50e866 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -380,14 +380,19 @@ def __init__(self, options, tgt_opts, forcefield):
         self.denoms    = {}
         # Weights for observables
         self.weights   = {}
+        # Suffixes for coordinate files
+        self.crdsfx    = {'gromacs':['.gro', '.pdb'], 
+                          'tinker':['.xyz', '.arc'], 
+                          'openmm':['.pdb']}[self.engname.lower()]
 
         ## A mapping that takes us from observable names to Observable objects.
         self.Observable_Map = {'density' : Observable_Density,
                                'rho' : Observable_Density,
                                'hvap' : Observable_H_vap,
                                'h_vap' : Observable_H_vap}
+        
 
-        ## Read source data and initialize points; creates self.Data, self.Ensembles and self.Observables objects.
+        ## Read source data and initialize points; creates self.Data, self.Indices and self.Observables objects.
         self.read_source(os.path.join(self.root, self.tgtdir, self.source))
         
         ## Copy run scripts from ForceBalance installation directory
@@ -396,7 +401,7 @@ def __init__(self, options, tgt_opts, forcefield):
                      os.path.join(self.root, self.tempdir, f))
 
         ## Set up simulations
-        self.prepare_simulations()
+        #self.determine_simulations()
 
     def read_source(self, srcfnm):
         """Read and store source data.
@@ -526,97 +531,269 @@ def reffld_error(reason=''):
             drows.append([i if i != '' else np.nan for i in row[1:]])
 
         # Turn it into a pandas DataFrame.
-        self.Data = pd.DataFrame(drows, columns=revhead[1:], index=pd.MultiIndex.from_tuples(index, names=['ensemble', 'subindex']))
+        self.Data = pd.DataFrame(drows, columns=revhead[1:], index=pd.MultiIndex.from_tuples(index, names=['index', 'subindex']))
 
-        # A list of ensembles (i.e. top-level indices) which correspond
+        # A list of indices (i.e. top-level indices) which correspond
         # to sets of simulations that we'll be running.
-        self.Ensembles = []
+        self.Indices = []
         for idx in self.Data.index:
-            if idx[0] not in self.Ensembles:
-                self.Ensembles.append(idx[0])
+            if idx[0] not in self.Indices:
+                self.Indices.append(idx[0])
 
         # A list of Observable objects (i.e. column headings) which
         # contain methods for calculating observables that we need.
         # Think about: 
-        # (1) How much variability is allowed across Ensembles?
+        # (1) How much variability is allowed across Indices?
         #     For instance, different S_cd is permissible.
         self.Observables = OrderedDict()
         for obsname in [stand_head(i, '')[2] for i in self.observable_names]:
             if obsname in self.Observables:
                 logger.error('%s was already specified as an observable' % (obsname))
             self.Observables[obsname] = OrderedDict()
-            for ie, ensemble in enumerate(self.Ensembles):
+            for ie, index in enumerate(self.Indices):
                 if obsname in self.Observable_Map:
-                    newobs = self.Observable_Map[obsname](source=self.Data.ix[ensemble])
+                    newobs = self.Observable_Map[obsname](source=self.Data.ix[index])
                     logger.info('%s is specified as an observable, appending %s class\n' % (obsname, newobs.__class__.__name__))
-                    self.Observables[obsname][ensemble] = newobs
+                    self.Observables[obsname][index] = newobs
                 else:
                     logger.warn('%s is specified but there is no corresponding Observable class, appending empty one\n' % obsname)
-                    self.Observables[obsname][ensemble] = Observable(name=obsname, source=self.Data.ix[ensemble])
-
-        # for ensemble in self.Ensembles:
-        #     self.Observables[ensemble] = []
+                    self.Observables[obsname][index] = Observable(name=obsname, source=self.Data.ix[index])
+
+        print self.Indices
+        print self.Observables
+        print repr(self.Data)
+        raw_input()
+        # for index in self.Indices:
+        #     self.Observables[index] = []
         # for obsname in obsnames:
-        #     for ensemble, ie in enumerate(self.Ensembles):
+        #     for index, ie in enumerate(self.Indices):
         #         if obsname in self.Observable_Map:
-        #             newobs = self.Observable_Map[obsname](source=self.Data.ix[ensemble])
-        #             if newobs.name in [obs.name for obs in self.Observables[ensemble]]:
+        #             newobs = self.Observable_Map[obsname](source=self.Data.ix[index])
+        #             if newobs.name in [obs.name for obs in self.Observables[index]]:
         #                 logger.error('%s is specified but a %s observable already exists' % (obsname, newobs.__class__.__name__))
         #             logger.info('%s is specified as an observable, appending %s class\n' % (obsname, newobs.__class__.__name__))
-        #             self.Observables[ensemble].append(newobs)
+        #             self.Observables[index].append(newobs)
         #         else:
         #             logger.warn('%s is specified but there is no corresponding Observable class, appending empty one\n' % obsname)
-        #             self.Observables[ensemble].append(Observable(name=obsname, source=self.Data.ix[ensemble]))
+        #             self.Observables[index].append(Observable(name=obsname, source=self.Data.ix[index]))
         return
 
+    def find_ic(self, index, stype, icn):
+        """ 
+        Search for a suitable initial condition file.
+    
+        Initial condition files will be searched for in the following priority:
+        targets/target_name/index/stype/ICs/stype_#.xyz
+        targets/target_name/index/stype/ICs/stype#.xyz
+        targets/target_name/index/stype/ICs/#.xyz
+        targets/target_name/index/stype/ICs/stype.xyz
+        targets/target_name/index/stype/ICs/coords.xyz
+        targets/target_name/index/stype/stype.xyz
+        targets/target_name/index/stype/coords.xyz
+        targets/target_name/index/stype.xyz
+        targets/target_name/stype.xyz
+        """
+        found = ''
+        basefnms = [(os.path.join(index, stype, 'ICs', stype+'_'+("%i" % icn)), True),
+                    (os.path.join(index, stype, 'ICs', stype+("%i" % icn)), True),
+                    (os.path.join(index, stype, 'ICs', ("%i" % icn)), True),
+                    (os.path.join(index, stype, 'ICs', stype), False),
+                    (os.path.join(index, stype, 'ICs', 'coords'), False),
+                    (os.path.join(index, stype, stype), False),
+                    (os.path.join(index, stype, 'coords'), False),
+                    (os.path.join(index, stype), False),
+                    (os.path.join(stype), False)]
+        paths = []
+        for fnm, numbered in basefnms:
+            for crdsfx in self.crdsfx:
+                fpath = os.path.join(self.tgtdir, fnm+crdsfx)
+                paths.append(fpath)
+                if os.path.exists(fpath):
+                    if found != '':
+                        logger.info('Target %s Index %s Simulation %s : '
+                                    '%s overrides %s\n' % (self.name, index, stype, fpath))
+                    else:
+                        if not numbered:
+                            M = Molecule(fpath)
+                            if len(M) <= icn:
+                                logger.error("Target %s Index %s Simulation %s : "
+                                             "initial coordinate file %s doesn't have enough structures\n" % 
+                                             (self.name, index, stype, fpath))
+                                raise RuntimeError
+                        logger.info('Target %s Index %s Simulation %s : '
+                                    'found initial coordinate file %s\n' % (self.name, index, stype, fpath))
+                        found = fpath
+        # if found == '':
+        #     logger.error('Target %s Index %s Simulation %s : '
+        #                  'could not find initial coordinate file\n'
+        #                  'Please provide one of the following:\n%s' 
+        #                  % (self.name, index, stype, '\n'.join(paths)))
+        #     raise RuntimeError
+        return found, 0 if numbered else icn
+    
     def determine_simulations(self):
 
         """ 
         Determine which simulations need to be run.  The same
-        simulations are run for each ensemble in the data set.
+        simulations are run for each index in the data set.
+
+        Note that there may be a different number of initial
+        conditions (i.e. parallel runs) for different indices.
         """
 
         # Determine which simulations are needed.
         sreqs = OrderedDict()
         for obsname in self.Observables:
-            sreqs[obsname] = self.Observables[obsname][self.Ensembles[0]].sreq
-
-        def narrow():
-            # Get the names of simulations that are REQUIRED to calculate the observables.
-            toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs]))
-            # Whoa, this is a deeply nested loop.  What does it do?
-            # First loop over the elements in "sreqs" for each observable name.
-            # If the element is a string, then it's a required simulation name (top level).
-            # If the element is a list, then it's a list of valid simulation names
-            # and we need to narrow the list down.
-            # For the ones that are lists (and have any intersection with the top level),
-            # delete the ones that don't intersect.
-            sreq0 = copy.deepcopy(sreqs)
+            sreqs[obsname] = self.Observables[obsname][self.Indices[0]].sreq
+
+        # toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs]))
+
+        self.Simulations = OrderedDict([(i, OrderedDict()) for i in self.Indices])
+        tsnames = []
+        for obsname in self.Observables:
+            treqs = self.Observables[obsname][self.Indices[0]].treq
+            for treq in treqs:
+                if treq not in tsnames:
+                    tsnames.append(treq)
+
+        for index in self.Indices:
+            # Loop over observable names.  Here we determine whether
+            # the initial coordinates are missing (bad), unique (good) or ambiguous (bad).
+            if 'n_ic' in self.Data.ix[index]:
+                n_ic = self.Data.ix[index]['n_ic']
+            else:
+                n_ic = 1
             for obsname in sreqs:
-                for sims in sreqs[obsname]:
-                    if type(sims) == list:
-                        if len(sims) == 1:
-                            sreqs[obsname] = [sims[0]]
-                        elif any([i in sims for i in toplevel]):
-                            for j in sims:
-                                if j not in toplevel: sims.remove(j)
-            return sreqs != sreq0
-
-        print sreqs
-        while narrow():
-            print sreqs
+                for stypes in sreqs[obsname]:
+                    if isinstance(stypes, str):
+                        stypes = [stypes]
+                    for icn in range(n_ic):
+                        icfiles = []
+                        svalid = []
+                        for stype in stypes:
+                            fpath, iframe = self.find_ic(index, stype, icn)
+                            if fpath != '':
+                                icfiles.append(fpath)
+                                svalid.append(stype)
+                        if len(icfiles) == 0:
+                            logger.error('Target %s Index %s Simulation %s : '
+                                         'could not find initial coordinate file\n'
+                                         % (self.name, index, stype))
+                            raise RuntimeError
+                        elif len(icfiles) > 1:
+                            logger.error('Target %s Index %s Simulation %s : '
+                                         'ambiguous initial coordinate files (%s)'
+                                         % (self.name, index, stype, ' '.join(icfiles)))
+                        self.Simulations[index][svalid[0]] = Simulation(index, svalid[0], icfiles[0], iframe, tsnames)
+                    
+        print self.Simulations
+        print tsnames
+        # raw_input()
+                # if isinstance(sreqs[obsname], str):
+                #     stypes = [sreqs[obsname]]
+                # for stype in stypes:
+                #     print index, stype
+                
+        # for stype in toplevel:
+        #     for index in self.Indices:
+        #         def find_ic(icn):
+        #             found = ''
+        #             # Initial condition files will be searched for in the following priority:
+        #             # targets/target_name/index/stype/ICs/stype_#.xyz
+        #             # targets/target_name/index/stype/ICs/stype#.xyz
+        #             # targets/target_name/index/stype/ICs/#.xyz
+        #             # targets/target_name/index/stype/ICs/stype.xyz
+        #             # targets/target_name/index/stype/ICs/coords.xyz
+        #             # targets/target_name/index/stype/stype.xyz
+        #             # targets/target_name/index/stype/coords.xyz
+        #             # targets/target_name/index/stype.xyz
+        #             # targets/target_name/stype.xyz
+        #             basefnms = [(os.path.join(index, stype, 'ICs', stype+'_'+("%i" % icn)), True),
+        #                         (os.path.join(index, stype, 'ICs', stype+("%i" % icn)), True),
+        #                         (os.path.join(index, stype, 'ICs', ("%i" % icn)), True),
+        #                         (os.path.join(index, stype, 'ICs', stype), False),
+        #                         (os.path.join(index, stype, 'ICs', 'coords'), False),
+        #                         (os.path.join(index, stype, stype), False),
+        #                         (os.path.join(index, stype, 'coords'), False),
+        #                         (os.path.join(index, stype), False),
+        #                         (os.path.join(stype), False)]
+        #             paths = []
+        #             for fnm, numbered in basefnms:
+        #                 for crdsfx in self.crdsfx:
+        #                     fpath = os.path.join(self.tgtdir, fnm+crdsfx)
+        #                     paths.append(fpath)
+        #                     if os.path.exists(fpath):
+        #                         if found != '':
+        #                             logger.info('Target %s Index %s Simulation %s : '
+        #                                         '%s overrides %s\n' % (self.name, index, stype, fpath))
+        #                         else:
+        #                             if not numbered:
+        #                                 M = Molecule(fpath)
+        #                                 if len(M) <= icn:
+        #                                     logger.error("Target %s Index %s Simulation %s : "
+        #                                                  "initial coordinate file %s doesn't have enough structures\n" % 
+        #                                                  (self.name, index, stype, fpath))
+        #                                     raise RuntimeError
+        #                             logger.info('Target %s Index %s Simulation %s : '
+        #                                         'found initial coordinate file %s\n' % (self.name, index, stype, fpath))
+        #                             found = fpath
+        #             if found == '':
+        #                 logger.error('Target %s Index %s Simulation %s : '
+        #                              'could not find initial coordinate file\n'
+        #                              'Please provide one of the following:\n%s' 
+        #                              % (self.name, index, stype, '\n'.join(paths)))
+        #                 raise RuntimeError
+        #             return found
+        #         if 'n_ic' in self.Data.ix[index]:
+        #             n_ic = self.Data.ix[index]['n_ic']
+        #         else:
+        #             n_ic = 1
+        #         for i in range(n_ic):
+        #             fpath = find_ic(i)
+
+        raw_input()
+        
+        # toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs]))
+        # print toplevel
+        # raw_input()
+        # return
+
+        # def narrow():
+        #     # Get the names of simulations that are REQUIRED to calculate the observables.
+        #     toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs]))
+        #     # Whoa, this is a deeply nested loop.  What does it do?
+        #     # First loop over the elements in "sreqs" for each observable name.
+        #     # If the element is a string, then it's a required simulation name (top level).
+        #     # If the element is a list, then it's a list of valid simulation names
+        #     # and we need to narrow the list down.
+        #     # For the ones that are lists (and have any intersection with the top level),
+        #     # delete the ones that don't intersect.
+        #     sreq0 = copy.deepcopy(sreqs)
+        #     for obsname in sreqs:
+        #         for sims in sreqs[obsname]:
+        #             if type(sims) == list:
+        #                 if len(sims) == 1:
+        #                     sreqs[obsname] = [sims[0]]
+        #                 elif any([i in sims for i in toplevel]):
+        #                     for j in sims:
+        #                         if j not in toplevel: sims.remove(j)
+        #     return sreqs != sreq0
+
+        # print sreqs
+        # while narrow():
+        #     print sreqs
         # For the leftover observables where there is still some ambiguity,
         # we attempt 
         # To do: Figure this out from existing initial conditions maybe
-        for obsname in sreqs:
-            for sims in sreqs[obsname]:
-                if type(sims) == list:
-                    for sim in sims:
-                        if has_ic(sim):
-                            sreqs[obsname] = [sim]
+        # for obsname in sreqs:
+        #     for sims in sreqs[obsname]:
+        #         if type(sims) == list:
+        #             for sim in sims:
+        #                 if has_ic(sim):
+        #                     sreqs[obsname] = [sim]
                         
 
-        self.Simulations = OrderedDict([(i, []) for i in self.Ensembles])
+        # self.Simulations = OrderedDict([(i, []) for i in self.Indices])
         
         return
 
@@ -633,7 +810,7 @@ def prepare_simulations(self):
         # print narrow()
             
         # The list of simulations that we'll be running.
-        self.Simulations = OrderedDict([(i, []) for i in self.Ensembles])
+        self.Simulations = OrderedDict([(i, []) for i in self.Indices])
         
         return
 
@@ -1070,4 +1247,38 @@ def __str__(self):
 
         return "\n".join(msg)
 
+class Simulation(object):
 
+    """ 
+    Data container for a simulation (specified by index, simulation
+    type, initial condition).
+    """
+
+    def __init__(self, index, stype, initial, iframe, tsnames):
+        self.index = index
+        self.stype = stype
+        self.initial = initial
+        self.iframe = iframe
+        self.timeseries = OrderedDict([(i, []) for i in tsnames])
+
+    def __str__(self):
+        msg = []
+        if self.temperature is None:
+            msg.append("State: Unknown.")
+        elif self.pressure is None:
+            msg.append("State: Point " + str(self.idnr) + " at " +
+                       str(self.temperature) + " K.")
+        else:
+            msg.append("State: Point " + str(self.idnr) + " at " +
+                       str(self.temperature) + " K and " +
+                       str(self.pressure) + " bar.")
+
+        msg.append("Point " + str(self.idnr) + " reference data " + "-"*30)
+        for key in self.ref:
+            msg.append("  " + key.strip() + " = " + str(self.ref[key]).strip())
+            
+        msg.append("Point " + str(self.idnr) + " calculated data " + "-"*30)
+        for key in self.data:
+            msg.append("  " + key.strip() + " = " + str(self.data[key]).strip())
+
+        return "\n".join(msg)
diff --git a/studies/004_thermo/single.in b/studies/004_thermo/single.in
index 8bfd6281f..275265680 100644
--- a/studies/004_thermo/single.in
+++ b/studies/004_thermo/single.in
@@ -66,7 +66,7 @@ $target
 name LiquidBromine
 type Thermo_GMX
 weight 1.0
-expdata_txt expset.txt
+source expset.txt
 quantities density h_vap
 n_sim_chain 2
 md_steps 100000
diff --git a/studies/004_thermo/targets/LiquidBromine/1/sim2.gro b/studies/004_thermo/targets/LiquidBromine/1/gas.gro
similarity index 100%
rename from studies/004_thermo/targets/LiquidBromine/1/sim2.gro
rename to studies/004_thermo/targets/LiquidBromine/1/gas.gro
diff --git a/studies/004_thermo/targets/LiquidBromine/1/sim2.mdp b/studies/004_thermo/targets/LiquidBromine/1/gas.mdp
similarity index 100%
rename from studies/004_thermo/targets/LiquidBromine/1/sim2.mdp
rename to studies/004_thermo/targets/LiquidBromine/1/gas.mdp
diff --git a/studies/004_thermo/targets/LiquidBromine/1/sim2.top b/studies/004_thermo/targets/LiquidBromine/1/gas.top
similarity index 100%
rename from studies/004_thermo/targets/LiquidBromine/1/sim2.top
rename to studies/004_thermo/targets/LiquidBromine/1/gas.top
diff --git a/studies/004_thermo/targets/LiquidBromine/1/sim1.gro b/studies/004_thermo/targets/LiquidBromine/1/liquid.gro
similarity index 100%
rename from studies/004_thermo/targets/LiquidBromine/1/sim1.gro
rename to studies/004_thermo/targets/LiquidBromine/1/liquid.gro
diff --git a/studies/004_thermo/targets/LiquidBromine/1/sim1.mdp b/studies/004_thermo/targets/LiquidBromine/1/liquid.mdp
similarity index 100%
rename from studies/004_thermo/targets/LiquidBromine/1/sim1.mdp
rename to studies/004_thermo/targets/LiquidBromine/1/liquid.mdp
diff --git a/studies/004_thermo/targets/LiquidBromine/1/sim1.top b/studies/004_thermo/targets/LiquidBromine/1/liquid.top
similarity index 100%
rename from studies/004_thermo/targets/LiquidBromine/1/sim1.top
rename to studies/004_thermo/targets/LiquidBromine/1/liquid.top
diff --git a/studies/004_thermo/targets/LiquidBromine/expset.txt b/studies/004_thermo/targets/LiquidBromine/expset.txt
index 0bfd3a7dc..766c88b32 100644
--- a/studies/004_thermo/targets/LiquidBromine/expset.txt
+++ b/studies/004_thermo/targets/LiquidBromine/expset.txt
@@ -1,7 +1,7 @@
 # Experimental data for liquid bromine.
 
  Index  Temp (K) Pressure (bar) Density (kg/m^3) w   Hvap ( kJ/mol ) w    
- 0      298.15   1.01325        3102.8           1.0 29.96           1.0  
+ 1      298.15   1.01325        3102.8           1.0 29.96           1.0  
 
 # Variables: Denominators and weights for quantities
 Denoms  = 30 0.3

From afd84859ad8172ff31e1e5a23e50932bab3ebd36 Mon Sep 17 00:00:00 2001
From: Lee-Ping Wang <leeping@Lee-Pings-MacBook-Pro.local>
Date: Tue, 8 Apr 2014 03:30:43 -0700
Subject: [PATCH 10/25] Observable and simulation setup should be working
 correctly now

---
 src/nifty.py                     |   6 +
 src/observable.py                | 151 ++++++++++----
 src/parser.py                    |   2 +-
 src/thermo.py                    | 348 +++++++++----------------------
 studies/004_thermo/single.in     |   2 +-
 studies/004_thermo/test_parse.in |  16 +-
 6 files changed, 215 insertions(+), 310 deletions(-)

diff --git a/src/nifty.py b/src/nifty.py
index 3fcc29d3b..b188934f9 100644
--- a/src/nifty.py
+++ b/src/nifty.py
@@ -237,6 +237,12 @@ def magic_string(str):
 #===============================#
 #| Math: Variable manipulation |#
 #===============================#
+def isnan(var):
+    """ Attempt to see if the given variable is np.nan. """
+    if isinstance(var, float):
+        return np.isnan(var)
+    return False
+
 def isint(word):
     """ONLY matches integers! If you have a decimal point? None shall pass!
 
diff --git a/src/observable.py b/src/observable.py
index a1139105a..c0eea075f 100644
--- a/src/observable.py
+++ b/src/observable.py
@@ -88,14 +88,15 @@ class Observable(object):
         "treq" - that are needed to *differentiate* this observable.
         (Usually energy derivatives)
     """
-    def __init__(self, source, name=None):
-        self.name        = name if name is not None else "empty"
-        self.sreq = []
-        self.treq = []
-        self.dreq = ['energy_derivatives']
+
+    def __init__(self, source):
+        # Reference data which can be useful in calculating the observable.
+        self.Data = source[self.columns]
+        # Required time series for the gradient (defaults to energy derivatives).
+        self.grad_requires = OrderedDict([(simulation, 'energy_derivatives') for simulation in self.requires.keys()])
                     
     def __str__(self):
-        return "observable is " + self.name.capitalize() + "."
+        return "Observable = " + self.name.capitalize() + "; Columns = " + ', '.join(self.columns)
 
     def extract(self, engines, FF, mvals, h, AGrad=True):
         """Calculate and extract the observable from MD results. How this is done
@@ -130,17 +131,25 @@ def extract(self, engines, FF, mvals, h, AGrad=True):
 
 # class Observable_Density
 class Observable_Density(Observable):
-    def __init__(self, source, name=None):
-        """ Density. """
-        super(Observable_Density, self).__init__(source, name)
-        
-        self.name = name if name is not None else "density"
 
-        # Calculating the density requires either a liquid or solid simulation.
-        self.sreq = [['liquid', 'solid']]
+    """ 
+    The Observable_Density class implements common methods for
+    extracting the density from a simulation, but does not specify the
+    simulation itself ('requires' attribute).  Don't create a
+    Density object directly, use the Liquid_Density and Solid_Density
+    derived classes.
+
+    This is due to our overall framework that each observable must
+    have a unique list of required simulations, yet the formula for
+    calculating the density and its derivative is always the same.
+    """
 
-        # Requires timeseries of densities from the simulation.
-        self.treq = ['density']
+    def __init__(self, source):
+        # Name of the observable.
+        self.name = 'density'
+        # Columns that are taken from the data table.
+        self.columns = ['density']
+        super(Observable_Density, self).__init__(source)
 
     def extract(self, engines, FF, mvals, h, pgrad, AGrad=True):         
         #==========================================#
@@ -201,22 +210,32 @@ def extract(self, engines, FF, mvals, h, pgrad, AGrad=True):
         # Analytic first derivative.
         Rho_grad = mBeta * (flat(np.mat(G) * col(Density)) / len(Density) \
                             - np.mean(Density) * np.mean(G, axis=1))
-            
         return Rho_avg, Rho_err, Rho_grad
 
+class Liquid_Density(Observable_Density):
+    def __init__(self, source):
+        # The density time series is required from the simulation.
+        self.requires = OrderedDict([('liquid', ['density'])])
+        super(Liquid_Density, self).__init__(source)
+
+class Solid_Density(Observable_Density):
+    def __init__(self, source):
+        # The density time series is required from the simulation.
+        self.requires = OrderedDict([('solid', ['density'])])
+        super(Solid_Density, self).__init__(source)
+
 # class Observable_H_vap
 class Observable_H_vap(Observable):
-    def __init__(self, source, name=None):
+    def __init__(self, source):
         """ Enthalpy of vaporization. """
-        super(Observable_H_vap, self).__init__(source, name)
-        
-        self.name = name if name is not None else "H_vap"
-
-        # Calculating the heat of vaporization requires a liquid simulation and a gas simulation.
-        self.sreq = [['liquid'], ['gas']]
-
-        # Requires timeseries of energies and volumes from the simulation.
-        self.treq = [['energy', 'volume'], ['energy']]
+        # Name of the observable.
+        self.name = 'hvap'
+        # Columns that are taken from the data table.
+        self.columns = ['hvap']
+        # Get energy/volume from liquid simulation, and energy from gas simulation.
+        self.requires = OrderedDict([('liquid', ['energy', 'volume']), ('gas', ['energy'])])
+        # Initialize the base class
+        super(Observable_H_vap, self).__init__(source)
 
     def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): 
         #==========================================#
@@ -315,23 +334,63 @@ def extract(self, engines, FF, mvals, h, pgrad, AGrad=True):
 
         return Hvap_avg, Hvap_err, Hvap_grad
 
-# class Observable_Kappa
-class Observable_Kappa(Observable):
-    def __init__(self, source, name=None):
-        """ Compressibility (applies to liquid and lipid bilayer.) """
-        super(Observable_H_vap, self).__init__(source, name)
-        
-        self.name = name if name is not None else "H_vap"
-
-        # List of dictionaries of simulations, keyed to timeseries to extract from the simulation.
-        # Each dictionary represents a simulation in a sequence, but the observable isn't mapped to a unique simulation each time.
-        # Because of this, we determine which simulation to launch (in the sequence) based on the available initial coordinates (or explicit user input).
-        # Depending on which simulation is executed, we require different timeseries from the simulation, and different formulas.
-        # But another way is to just define two observables ... need to think about it.
-
-        self.sreq = [{'liquid':['volume'], 'bilayer':['al']},
-                     ]
-        
-
-        # Requires timeseries of energies and volumes from the simulation.
-        self.treq = [['energy', 'volume'], ['energy']]
+# class Observable_Al
+class Observable_Al(Observable):
+    def __init__(self, source):
+        """ Area per lipid. """
+        # Name of the observable.
+        self.name = 'al'
+        # Columns that are taken from the data table.
+        self.columns = ['al']
+        # Get area per lipid from the bilayer simulation.
+        self.requires = OrderedDict([('bilayer', ['al'])])
+        # Initialize the base class
+        super(Observable_Al, self).__init__(source)
+
+# class Observable_Scd
+class Observable_Scd(Observable):
+    def __init__(self, source):
+        """ Deuterium order parameter. """
+        # Name of the observable.
+        self.name = 'scd'
+        # Columns that are taken from the data table.
+        self.columns = ['scd1_idx', 'scd1', 'scd2_idx', 'scd2']
+        # Get deuterium order parameter from the bilayer simulation.
+        self.requires = OrderedDict([('bilayer', ['scd1', 'scd2'])])
+        # Initialize the base class
+        super(Observable_Scd, self).__init__(source)
+
+# class Lipid_Kappa
+class Lipid_Kappa(Observable):
+    def __init__(self, source):
+        """ Compressibility as calculated for lipid bilayers. """
+        # Name of the observable.
+        self.name = 'kappa'
+        # Columns that are taken from the data table.
+        self.columns = ['kappa']
+        # Get area per lipid from the bilayer simulation.
+        self.requires = OrderedDict([('bilayer', ['al'])])
+        # Initialize the base class
+        super(Lipid_Kappa, self).__init__(source)
+
+# class Liquid_Kappa
+class Liquid_Kappa(Observable):
+    def __init__(self, source):
+        """ Compressibility as calculated for liquids. """
+        # Name of the observable.
+        self.name = 'kappa'
+        # Columns that are taken from the data table.
+        self.columns = ['kappa']
+        # Get area per lipid from the bilayer simulation.
+        self.requires = OrderedDict([('liquid', ['volume'])])
+        # Initialize the base class
+        super(Liquid_Kappa, self).__init__(source)
+
+## A mapping that takes us from observable names to possible Observable objects.
+OMap = {'density' : [Liquid_Density, Solid_Density],
+        'rho' : [Liquid_Density, Solid_Density],
+        'hvap' : [Observable_H_vap],
+        'h_vap' : [Observable_H_vap],
+        'al' : [Observable_Al],
+        'kappa' : [Liquid_Kappa, Lipid_Kappa],
+        'scd' : [Observable_Scd]}
diff --git a/src/parser.py b/src/parser.py
index 340b45723..fb3d26e58 100644
--- a/src/parser.py
+++ b/src/parser.py
@@ -158,6 +158,7 @@
                  },
     'lists'   : {"fd_ptypes" : ([], -100, 'The parameter types that are differentiated using finite difference', 'In conjunction with fdgrad, fdhess, fdhessdiag; usually not needed'),
                  "observables" : ([], 100, 'List of observables to be fitted, each must have corresponding Quantity subclass', 'Thermodynamic properties target', 'thermo'),
+                 "simulations" : ([], 100, 'Simulations used to calculate observables.', 'Thermodynamic property targets', 'thermo'),
                  },
     'ints'    : {"shots"              : (-1, 0, 'Number of snapshots; defaults to all of the snapshots', 'Energy + Force Matching', 'AbInitio'),
                  "fitatoms"           : (0, 0, 'Number of fitting atoms; defaults to all of them', 'Energy + Force Matching', 'AbInitio'),
@@ -174,7 +175,6 @@
                  "save_traj"          : (0, -10, 'Whether to save trajectories.  0 = Never save; 1 = Delete if optimization step is good; 2 = Always save', 'Condensed phase properties', 'Liquid, Lipid'),
                  "eq_steps"           : (20000, 0, 'Number of time steps for the equilibration run.', 'Thermodynamic property targets', 'thermo'),
                  "md_steps"           : (50000, 0, 'Number of time steps for the production run.', 'Thermodynamic property targets', 'thermo'),
-                 "n_sim_chain"        : (1, 0, 'Number of simulations required to calculate observables.', 'Thermodynamic property targets', 'thermo'),
                  },
     'bools'   : {"whamboltz"        : (0, -100, 'Whether to use WHAM Boltzmann Weights', 'Ab initio targets with Boltzmann weights (advanced usage)', 'AbInitio'),
                  "sampcorr"         : (0, -150, 'Whether to use the archaic sampling correction', 'Energy + Force Matching, very old option, do not use', 'AbInitio'),
diff --git a/src/thermo.py b/src/thermo.py
index 93e50e866..4cf5661a8 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -8,10 +8,11 @@
 import itertools
 import cStringIO
 
-from forcebalance.observable import *
+from forcebalance.molecule import Molecule
+from forcebalance.observable import OMap
 from forcebalance.target import Target
 from forcebalance.finite_difference import in_fd
-from forcebalance.nifty import flat, col, row, isint
+from forcebalance.nifty import flat, col, row, isint, isnan
 from forcebalance.nifty import lp_dump, lp_load, wopen, _exec
 from forcebalance.nifty import LinkFile, link_dir_contents
 from forcebalance.nifty import printcool, printcool_dictionary
@@ -363,9 +364,9 @@ def __init__(self, options, tgt_opts, forcefield):
         # Source data (experimental data, model parameters and weights)
         self.set_option(tgt_opts, "source", forceprint=True)
         # Observables to calculate
-        self.set_option(tgt_opts, "observables", "observable_names", forceprint=True)
+        self.set_option(tgt_opts, "observables", "onames", forceprint=True)
         # Length of simulation chain
-        self.set_option(tgt_opts, "n_sim_chain", forceprint=True)
+        self.set_option(tgt_opts, "simulations", forceprint=True)
         # Number of time steps in the equilibration run
         self.set_option(tgt_opts, "eq_steps", forceprint=True)
         # Number of time steps in the production run
@@ -385,23 +386,20 @@ def __init__(self, options, tgt_opts, forcefield):
                           'tinker':['.xyz', '.arc'], 
                           'openmm':['.pdb']}[self.engname.lower()]
 
-        ## A mapping that takes us from observable names to Observable objects.
-        self.Observable_Map = {'density' : Observable_Density,
-                               'rho' : Observable_Density,
-                               'hvap' : Observable_H_vap,
-                               'h_vap' : Observable_H_vap}
-        
-
-        ## Read source data and initialize points; creates self.Data, self.Indices and self.Observables objects.
+        ## Read source data and initialize points; creates self.Data, self.Indices and self.Columns objects.
         self.read_source(os.path.join(self.root, self.tgtdir, self.source))
+
+        ## Set up self.Observables.
+        self.initialize_observables()
+
+        ## Set up self.Simulations.
+        self.initialize_simulations()
         
         ## Copy run scripts from ForceBalance installation directory
         for f in self.scripts:
             LinkFile(os.path.join(os.path.split(__file__)[0], "data", f),
                      os.path.join(self.root, self.tempdir, f))
 
-        ## Set up simulations
-        #self.determine_simulations()
 
     def read_source(self, srcfnm):
         """Read and store source data.
@@ -421,8 +419,8 @@ def read_source(self, srcfnm):
         source = parse1(srcfnm)
         printcool_dictionary(source.metadata, title="Metadata")
         revhead = []
-        obs = ''
-        obsnames = []
+        col = ''
+        colnames = []
 
         units = defaultdict(str)
 
@@ -430,8 +428,8 @@ def read_source(self, srcfnm):
             if i == 0 and head.lower() == 'index': # Treat special case because index can also mean other things
                 revhead.append('index')
                 continue
-            newh, punit, obs = stand_head(head, obs)
-            if obs not in obsnames + ['temp', 'pres', 'n_ic']: obsnames.append(obs)
+            newh, punit, col = stand_head(head, col)
+            if col not in colnames + ['temp', 'pres', 'n_ic']: colnames.append(col)
             revhead.append(newh)
             if punit != '':
                 units[newh] = punit
@@ -481,7 +479,7 @@ def read_source(self, srcfnm):
                 # (2) There can only be one file per system index / column.
                 # (3) The column heading in the secondary file that's being
                 # referenced must match that of the reference in the primary file.
-                obs2 = ''
+                col2 = ''
                 for cid_, fld in enumerate(row[1:]):
                     if ':' not in fld: continue
                     cid = cid_ + 1
@@ -504,7 +502,7 @@ def reffld_error(reason=''):
                         reffld_error('%s already contains a file reference' % (saveidx, revhead[cid]))
                     subfile = parse1(fpath)
                     fcol = fcol_ - 1
-                    head2, punit2, obs2 = stand_head(subfile.heading[fcol], obs2)
+                    head2, punit2, col2 = stand_head(subfile.heading[fcol], col2)
                     if revhead[cid] != head2:
                         reffld_error("Column heading of %s (%s) doesn't match original (%s)" % (fnm, head2, revhead[cid]))
                     fref[(saveidx, revhead[cid])] = [row2[fcol] for row2 in subfile.table]
@@ -533,6 +531,18 @@ def reffld_error(reason=''):
         # Turn it into a pandas DataFrame.
         self.Data = pd.DataFrame(drows, columns=revhead[1:], index=pd.MultiIndex.from_tuples(index, names=['index', 'subindex']))
 
+        def intcol(col):
+            if col in self.Data.columns:
+                for idx in self.Data.index:
+                    if not isnan(self.Data[col][idx]):
+                        self.Data[col][idx] = int(self.Data[col][idx])
+
+        def floatcol(col):
+            if col in self.Data.columns:
+                self.Data[col] = self.Data[col].astype(float)
+
+        intcol('n_ic')
+
         # A list of indices (i.e. top-level indices) which correspond
         # to sets of simulations that we'll be running.
         self.Indices = []
@@ -540,42 +550,9 @@ def reffld_error(reason=''):
             if idx[0] not in self.Indices:
                 self.Indices.append(idx[0])
 
-        # A list of Observable objects (i.e. column headings) which
-        # contain methods for calculating observables that we need.
-        # Think about: 
-        # (1) How much variability is allowed across Indices?
-        #     For instance, different S_cd is permissible.
-        self.Observables = OrderedDict()
-        for obsname in [stand_head(i, '')[2] for i in self.observable_names]:
-            if obsname in self.Observables:
-                logger.error('%s was already specified as an observable' % (obsname))
-            self.Observables[obsname] = OrderedDict()
-            for ie, index in enumerate(self.Indices):
-                if obsname in self.Observable_Map:
-                    newobs = self.Observable_Map[obsname](source=self.Data.ix[index])
-                    logger.info('%s is specified as an observable, appending %s class\n' % (obsname, newobs.__class__.__name__))
-                    self.Observables[obsname][index] = newobs
-                else:
-                    logger.warn('%s is specified but there is no corresponding Observable class, appending empty one\n' % obsname)
-                    self.Observables[obsname][index] = Observable(name=obsname, source=self.Data.ix[index])
-
-        print self.Indices
-        print self.Observables
-        print repr(self.Data)
-        raw_input()
-        # for index in self.Indices:
-        #     self.Observables[index] = []
-        # for obsname in obsnames:
-        #     for index, ie in enumerate(self.Indices):
-        #         if obsname in self.Observable_Map:
-        #             newobs = self.Observable_Map[obsname](source=self.Data.ix[index])
-        #             if newobs.name in [obs.name for obs in self.Observables[index]]:
-        #                 logger.error('%s is specified but a %s observable already exists' % (obsname, newobs.__class__.__name__))
-        #             logger.info('%s is specified as an observable, appending %s class\n' % (obsname, newobs.__class__.__name__))
-        #             self.Observables[index].append(newobs)
-        #         else:
-        #             logger.warn('%s is specified but there is no corresponding Observable class, appending empty one\n' % obsname)
-        #             self.Observables[index].append(Observable(name=obsname, source=self.Data.ix[index]))
+        # List of columns in the main data table.
+        self.Columns = self.Data.columns
+
         return
 
     def find_ic(self, index, stype, icn):
@@ -623,195 +600,75 @@ def find_ic(self, index, stype, icn):
                         logger.info('Target %s Index %s Simulation %s : '
                                     'found initial coordinate file %s\n' % (self.name, index, stype, fpath))
                         found = fpath
-        # if found == '':
-        #     logger.error('Target %s Index %s Simulation %s : '
-        #                  'could not find initial coordinate file\n'
-        #                  'Please provide one of the following:\n%s' 
-        #                  % (self.name, index, stype, '\n'.join(paths)))
-        #     raise RuntimeError
+
+        if found == '':
+            logger.error('Could not find initial coordinate file for simulation type %s' % stype)
+            raise RuntimeError
+
         return found, 0 if numbered else icn
     
-    def determine_simulations(self):
-
+    def initialize_observables(self):
         """ 
-        Determine which simulations need to be run.  The same
-        simulations are run for each index in the data set.
+        Determine Observable objects to be created.  Checks to see
+        whether simulations are consistent with observables (i.e. no
+        missing simulations or ambiguities.)
 
+        In order to implement a new observable, create a class in
+        observable.py and add it to OMap.
+        """
+        self.Observables = OrderedDict()
+        for oname in [stand_head(i, '')[2] for i in self.onames]:
+            if oname in self.Observables:
+                logger.error('%s was already specified as an observable' % (oname))
+                raise RuntimeError
+            self.Observables[oname] = OrderedDict()
+            for index in self.Indices:
+                if oname in OMap:
+                    Objs = []
+                    Reqs = []
+                    for OClass in OMap[oname]:
+                        OObj = OClass(self.Data)
+                        Reqs.append(OObj.requires.keys())
+                        if all([i in self.simulations for i in OObj.requires.keys()]):
+                            Objs.append(OObj)
+                    if len(Objs) == 0:
+                        logger.error('Observable %s is specified but required simulations are missing; choose %s' % (oname, ' or '.join([str(r) for r in Reqs])))
+                        raise RuntimeError
+                    if len(Objs) > 1:
+                        logger.error("Observable %s not uniquely mapped to simulations (choose between %s)" % (oname, ' or '.join([o.name in Objs])))
+                        raise RuntimeError
+                    logger.info("Creating %s observable object for index %s\n" % (Objs[0].name, index))
+                    self.Observables[oname][index] = Objs[0]
+                else:
+                    logger.error('%s is specified but there is no corresponding Observable class\n' % oname)
+                    raise RuntimeError
+        return
+
+    def initialize_simulations(self):
+        """ 
+        Determine simulations to be run.  The same simulations are
+        run for each index in the data set. 
+        
         Note that there may be a different number of initial
         conditions (i.e. parallel runs) for different indices.
         """
 
-        # Determine which simulations are needed.
-        sreqs = OrderedDict()
-        for obsname in self.Observables:
-            sreqs[obsname] = self.Observables[obsname][self.Indices[0]].sreq
-
-        # toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs]))
-
         self.Simulations = OrderedDict([(i, OrderedDict()) for i in self.Indices])
-        tsnames = []
-        for obsname in self.Observables:
-            treqs = self.Observables[obsname][self.Indices[0]].treq
-            for treq in treqs:
-                if treq not in tsnames:
-                    tsnames.append(treq)
-
         for index in self.Indices:
-            # Loop over observable names.  Here we determine whether
-            # the initial coordinates are missing (bad), unique (good) or ambiguous (bad).
             if 'n_ic' in self.Data.ix[index]:
-                n_ic = self.Data.ix[index]['n_ic']
+                nics = [i for i in self.Data.ix[index]['n_ic'] if not isnan(i)]
+                if len(nics) != 1:
+                    logger.error("Expected 1 number for n_ic but got %i" % len(nics))
+                    raise RuntimeError
+                n_ic = nics[0]
             else:
                 n_ic = 1
-            for obsname in sreqs:
-                for stypes in sreqs[obsname]:
-                    if isinstance(stypes, str):
-                        stypes = [stypes]
-                    for icn in range(n_ic):
-                        icfiles = []
-                        svalid = []
-                        for stype in stypes:
-                            fpath, iframe = self.find_ic(index, stype, icn)
-                            if fpath != '':
-                                icfiles.append(fpath)
-                                svalid.append(stype)
-                        if len(icfiles) == 0:
-                            logger.error('Target %s Index %s Simulation %s : '
-                                         'could not find initial coordinate file\n'
-                                         % (self.name, index, stype))
-                            raise RuntimeError
-                        elif len(icfiles) > 1:
-                            logger.error('Target %s Index %s Simulation %s : '
-                                         'ambiguous initial coordinate files (%s)'
-                                         % (self.name, index, stype, ' '.join(icfiles)))
-                        self.Simulations[index][svalid[0]] = Simulation(index, svalid[0], icfiles[0], iframe, tsnames)
-                    
-        print self.Simulations
-        print tsnames
-        # raw_input()
-                # if isinstance(sreqs[obsname], str):
-                #     stypes = [sreqs[obsname]]
-                # for stype in stypes:
-                #     print index, stype
-                
-        # for stype in toplevel:
-        #     for index in self.Indices:
-        #         def find_ic(icn):
-        #             found = ''
-        #             # Initial condition files will be searched for in the following priority:
-        #             # targets/target_name/index/stype/ICs/stype_#.xyz
-        #             # targets/target_name/index/stype/ICs/stype#.xyz
-        #             # targets/target_name/index/stype/ICs/#.xyz
-        #             # targets/target_name/index/stype/ICs/stype.xyz
-        #             # targets/target_name/index/stype/ICs/coords.xyz
-        #             # targets/target_name/index/stype/stype.xyz
-        #             # targets/target_name/index/stype/coords.xyz
-        #             # targets/target_name/index/stype.xyz
-        #             # targets/target_name/stype.xyz
-        #             basefnms = [(os.path.join(index, stype, 'ICs', stype+'_'+("%i" % icn)), True),
-        #                         (os.path.join(index, stype, 'ICs', stype+("%i" % icn)), True),
-        #                         (os.path.join(index, stype, 'ICs', ("%i" % icn)), True),
-        #                         (os.path.join(index, stype, 'ICs', stype), False),
-        #                         (os.path.join(index, stype, 'ICs', 'coords'), False),
-        #                         (os.path.join(index, stype, stype), False),
-        #                         (os.path.join(index, stype, 'coords'), False),
-        #                         (os.path.join(index, stype), False),
-        #                         (os.path.join(stype), False)]
-        #             paths = []
-        #             for fnm, numbered in basefnms:
-        #                 for crdsfx in self.crdsfx:
-        #                     fpath = os.path.join(self.tgtdir, fnm+crdsfx)
-        #                     paths.append(fpath)
-        #                     if os.path.exists(fpath):
-        #                         if found != '':
-        #                             logger.info('Target %s Index %s Simulation %s : '
-        #                                         '%s overrides %s\n' % (self.name, index, stype, fpath))
-        #                         else:
-        #                             if not numbered:
-        #                                 M = Molecule(fpath)
-        #                                 if len(M) <= icn:
-        #                                     logger.error("Target %s Index %s Simulation %s : "
-        #                                                  "initial coordinate file %s doesn't have enough structures\n" % 
-        #                                                  (self.name, index, stype, fpath))
-        #                                     raise RuntimeError
-        #                             logger.info('Target %s Index %s Simulation %s : '
-        #                                         'found initial coordinate file %s\n' % (self.name, index, stype, fpath))
-        #                             found = fpath
-        #             if found == '':
-        #                 logger.error('Target %s Index %s Simulation %s : '
-        #                              'could not find initial coordinate file\n'
-        #                              'Please provide one of the following:\n%s' 
-        #                              % (self.name, index, stype, '\n'.join(paths)))
-        #                 raise RuntimeError
-        #             return found
-        #         if 'n_ic' in self.Data.ix[index]:
-        #             n_ic = self.Data.ix[index]['n_ic']
-        #         else:
-        #             n_ic = 1
-        #         for i in range(n_ic):
-        #             fpath = find_ic(i)
-
-        raw_input()
-        
-        # toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs]))
-        # print toplevel
-        # raw_input()
-        # return
-
-        # def narrow():
-        #     # Get the names of simulations that are REQUIRED to calculate the observables.
-        #     toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs]))
-        #     # Whoa, this is a deeply nested loop.  What does it do?
-        #     # First loop over the elements in "sreqs" for each observable name.
-        #     # If the element is a string, then it's a required simulation name (top level).
-        #     # If the element is a list, then it's a list of valid simulation names
-        #     # and we need to narrow the list down.
-        #     # For the ones that are lists (and have any intersection with the top level),
-        #     # delete the ones that don't intersect.
-        #     sreq0 = copy.deepcopy(sreqs)
-        #     for obsname in sreqs:
-        #         for sims in sreqs[obsname]:
-        #             if type(sims) == list:
-        #                 if len(sims) == 1:
-        #                     sreqs[obsname] = [sims[0]]
-        #                 elif any([i in sims for i in toplevel]):
-        #                     for j in sims:
-        #                         if j not in toplevel: sims.remove(j)
-        #     return sreqs != sreq0
-
-        # print sreqs
-        # while narrow():
-        #     print sreqs
-        # For the leftover observables where there is still some ambiguity,
-        # we attempt 
-        # To do: Figure this out from existing initial conditions maybe
-        # for obsname in sreqs:
-        #     for sims in sreqs[obsname]:
-        #         if type(sims) == list:
-        #             for sim in sims:
-        #                 if has_ic(sim):
-        #                     sreqs[obsname] = [sim]
-                        
-
-        # self.Simulations = OrderedDict([(i, []) for i in self.Indices])
-        
-        return
-
-    def prepare_simulations(self):
-
-        """ 
-
-        Prepare simulations to be launched.  Set initial conditions
-        and create directories.  This function is intended to be run
-        at the start of each optimization cycle, so that initial
-        conditions may be easily set.
-
-        """
-        # print narrow()
-            
-        # The list of simulations that we'll be running.
-        self.Simulations = OrderedDict([(i, []) for i in self.Indices])
-        
+            for s in self.simulations:
+                for icn in range(n_ic):
+                    fpath, iframe = self.find_ic(index, s, icn)
+                    self.Simulations[index][s] = Simulation(index, s, fpath, iframe)
+                    print index, s, str(self.Simulations[index][s])
+        # print self.Simulations
         return
 
     def launch_simulation(self, index, simname):
@@ -1254,31 +1111,14 @@ class Simulation(object):
     type, initial condition).
     """
 
-    def __init__(self, index, stype, initial, iframe, tsnames):
+    def __init__(self, index, stype, initial, iframe):
         self.index = index
-        self.stype = stype
+        self.type = stype
         self.initial = initial
         self.iframe = iframe
-        self.timeseries = OrderedDict([(i, []) for i in tsnames])
 
     def __str__(self):
         msg = []
-        if self.temperature is None:
-            msg.append("State: Unknown.")
-        elif self.pressure is None:
-            msg.append("State: Point " + str(self.idnr) + " at " +
-                       str(self.temperature) + " K.")
-        else:
-            msg.append("State: Point " + str(self.idnr) + " at " +
-                       str(self.temperature) + " K and " +
-                       str(self.pressure) + " bar.")
-
-        msg.append("Point " + str(self.idnr) + " reference data " + "-"*30)
-        for key in self.ref:
-            msg.append("  " + key.strip() + " = " + str(self.ref[key]).strip())
-            
-        msg.append("Point " + str(self.idnr) + " calculated data " + "-"*30)
-        for key in self.data:
-            msg.append("  " + key.strip() + " = " + str(self.data[key]).strip())
-
-        return "\n".join(msg)
+        msg.append("Simulation: Index %s Type %s" % (self.index, self.type))
+        msg.append("Initial conditions: File %s Frame %i" % (self.initial, self.iframe))
+        return '\n'.join(msg)
diff --git a/studies/004_thermo/single.in b/studies/004_thermo/single.in
index 275265680..9fb288210 100644
--- a/studies/004_thermo/single.in
+++ b/studies/004_thermo/single.in
@@ -68,7 +68,7 @@ type Thermo_GMX
 weight 1.0
 source expset.txt
 quantities density h_vap
-n_sim_chain 2
+simulations liquid gas
 md_steps 100000
 eq_steps 50000
 $end
diff --git a/studies/004_thermo/test_parse.in b/studies/004_thermo/test_parse.in
index 4eb82ab64..89edb69a7 100644
--- a/studies/004_thermo/test_parse.in
+++ b/studies/004_thermo/test_parse.in
@@ -72,7 +72,7 @@ type Thermo_GMX
 weight 1.0
 source expset.txt
 observables density h_vap
-n_sim_chain 2
+simulations liquid gas
 md_steps 100000
 eq_steps 50000
 $end
@@ -83,7 +83,7 @@ type Thermo_GMX
 weight 1.0
 source data.csv
 observables density h_vap
-n_sim_chain 2
+simulations liquid gas
 md_steps 100000
 eq_steps 50000
 $end
@@ -94,7 +94,7 @@ type Thermo_GMX
 weight 1.0
 source data.tab.txt
 observables density h_vap
-n_sim_chain 2
+simulations liquid gas
 md_steps 100000
 eq_steps 50000
 $end
@@ -105,7 +105,7 @@ type Thermo_GMX
 weight 1.0
 source lipidcol1.txt
 observables al scd kappa
-n_sim_chain 2
+simulations bilayer
 md_steps 100000
 eq_steps 50000
 $end
@@ -116,7 +116,7 @@ type Thermo_GMX
 weight 1.0
 source lipidcol1.txt
 observables al scd kappa
-n_sim_chain 2
+simulations bilayer
 md_steps 100000
 eq_steps 50000
 $end
@@ -127,7 +127,7 @@ type Thermo_GMX
 weight 1.0
 source lipidcol1.txt
 observables al scd kappa
-n_sim_chain 2
+simulations bilayer
 md_steps 100000
 eq_steps 50000
 $end
@@ -138,7 +138,7 @@ type Thermo_GMX
 weight 1.0
 source lipidcol2a.txt
 observables al scd kappa
-n_sim_chain 2
+simulations bilayer
 md_steps 100000
 eq_steps 50000
 $end
@@ -149,7 +149,7 @@ type Thermo_GMX
 weight 1.0
 source lipidcol1.txt
 observables al scd kappa
-n_sim_chain 2
+simulations bilayer
 md_steps 100000
 eq_steps 50000
 $end

From efdb8427ba222391ed828384e297d232c3620754 Mon Sep 17 00:00:00 2001
From: Lee-Ping Wang <leeping@Lee-Pings-MacBook-Pro.local>
Date: Tue, 8 Apr 2014 03:32:39 -0700
Subject: [PATCH 11/25] Clean up

---
 src/thermo.py | 75 ---------------------------------------------------
 1 file changed, 75 deletions(-)

diff --git a/src/thermo.py b/src/thermo.py
index 4cf5661a8..9d57010ae 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -697,81 +697,6 @@ def launch_simulation(self, index, simname):
                          input_files = self.nptfiles + self.scripts + ['forcebalance.p'],
                          output_files = ['npt_result.p.bz2', 'npt.out'] + self.extra_output, tgt=self)
     
-    # NAMES FOR OBJECTS!  
-
-    # Timeseries: Time series of an instantaneous observable that is
-    # returned by the MD simulation.
-
-    # Observable: A thermodynamic property which can be compared to
-    # experiment and possesses methods for calculating the property
-    # and its derivatives.
-
-    # State? Point? What should this be called??
-
-        # # print revhead[1:]
-        # # for rn, row in enumerate(drows):
-        # #     print index[rn], row
-
-        # # print repr(self.Data)
-
-        # # # pd.DataFrame([OrderedDict([(head, row[i]) for i, head in revised_heading if row[i] != '']) for row in source.table])
-
-
-        # # # pd.DataFrame(OrderedDict([(head,[row[i] for row in source.table]) for i, head in enumerate(revised_heading)]))
-        # # # print self.Data.__repr__
-        # # # raw_input()
-
-        # # return
-
-        # fp = open(expdata)
-        
-        # line         = fp.readline()
-        # foundHeader  = False
-        # names        = None
-        # units        = None
-        # label_header = None
-        # label_unit   = None
-        # count        = 0
-        # metadata     = {}
-        # while line:
-        #     # Skip comments and blank lines
-        #     if line.lstrip().startswith("#") or not line.strip():
-        #         line = fp.readline()
-        #         continue
-        #     # Metadata is denoted using 
-        #     if "=" in line: # Read variable
-        #         param, value = line.split("=")
-        #         param = param.strip().lower()
-        #         metadata[param] = value
-        #         # if param == "denoms":
-        #         #     for e, v in enumerate(value.split()):
-        #         #         self.denoms[self.observables[e]] = float(v)
-        #         # elif param == "weights":
-        #         #     for e, v in enumerate(value.split()):
-        #         #         self.weights[self.observables[e]] = float(v)
-        #     elif foundHeader: # Read exp data
-        #         count      += 1
-        #         vals        = line.split()
-        #         label       = (vals[0], label_header, label_unit)
-        #         refs        = np.array(vals[1:-2:2]).astype(float)
-        #         wts         = np.array(vals[2:-2:2]).astype(float)
-        #         temperature = float(vals[-2])
-        #         pressure    = None if vals[-1].lower() == "none" else \
-        #           float(vals[-1])
-        #         dp = Point(count, label=label, refs=refs, weights=wts,
-        #                    names=names, units=units,
-        #                    temperature=temperature, pressure=pressure)
-        #         self.points.append(dp)
-        #     else: # Read headers
-        #         foundHeader = True
-        #         headers = zip(*[tuple(h.split("_")) for h in line.split()
-        #                         if h != "w"])
-        #         label_header = list(headers[0])[0]
-        #         label_unit   = list(headers[1])[0]
-        #         names        = list(headers[0][1:-2])
-        #         units        = list(headers[1][1:-2])
-        #     line = fp.readline()            
-    
     def retrieve(self, dp):
         """Retrieve the molecular dynamics (MD) results and store the calculated
         observables in the Point object dp.

From 132e70a7ead9ae5fba414d6a750b73eb012f589e Mon Sep 17 00:00:00 2001
From: Lee-Ping <leeping@stanford.edu>
Date: Wed, 16 Apr 2014 14:35:08 -0700
Subject: [PATCH 12/25] Target now knows which simulations to launch.  Next
 task: Pass in auxiliary files (.top, .mdp) and run the simulation.

---
 src/forcefield.py                             |  30 +-
 src/gmxio.py                                  |   5 +-
 src/nifty.py                                  |  12 +
 src/observable.py                             |  33 +-
 src/parser.py                                 |   3 +-
 src/thermo.py                                 | 469 ++++++------------
 studies/004_thermo/single.in                  |  12 +-
 .../targets/Lipid_TAB/lipidcol1.txt           |   4 +-
 8 files changed, 219 insertions(+), 349 deletions(-)

diff --git a/src/forcefield.py b/src/forcefield.py
index 7db442a39..9aef243ff 100644
--- a/src/forcefield.py
+++ b/src/forcefield.py
@@ -193,7 +193,7 @@ class FF(forcebalance.BaseClass):
     For details on force field parsing, see the detailed documentation for addff.
     
     """
-    def __init__(self, options, verbose=True):
+    def __init__(self, options, verbose=True, printopt=True):
 
         """Instantiation of force field class.
 
@@ -227,6 +227,8 @@ def __init__(self, options, verbose=True):
         self.set_option(options, 'rigid_water')
         ## Bypass the transformation and use physical parameters directly
         self.set_option(options, 'use_pvals')
+        ## Allow duplicate parameter names (internally construct unique names)
+        self.set_option(options, 'duplicate_pnames')
         
         #======================================#
         #     Variables which are set here     #
@@ -318,7 +320,12 @@ def __init__(self, options, verbose=True):
         self.linedestroy_this = []
         self.prmdestroy_this = []
         ## Print the optimizer options.
-        printcool_dictionary(self.PrintOptionDict, title="Setup for force field")
+        if printopt: printcool_dictionary(self.PrintOptionDict, title="Setup for force field")
+
+    @classmethod
+    def fromfile(cls, fnm):
+        options = {'forcefield' : [fnm], 'ffdir' : '.', 'duplicate_pnames' : True}
+        return cls(options, verbose=False, printopt=False)
 
     def addff(self,ffname):
         """ Parse a force field file and add it to the class.
@@ -496,15 +503,32 @@ def addff_txt(self, ffname, fftype):
                     # For each of the fields that are to be parameterized (indicated by PRM #),
                     # assign a parameter type to it according to the Interaction Type -> Parameter Dictionary.
                     pid = self.Readers[ffname].build_pid(pfld)
+                    pid_ = pid
                     # Add pid into the dictionary.
                     # LPW: Here is a hack to allow duplicate parameter IDs.
                     if pid in self.map:
                         pid0 = pid
                         extranum = 0
+                        dupfnms = [os.path.basename(i[0]) for i in self.pfields[self.map[pid]]]
+                        duplns = [i[1] for i in self.pfields[self.map[pid]]]
+                        dupflds = [i[2] for i in self.pfields[self.map[pid]]]
                         while pid in self.map:
                             pid = "%s%i" % (pid0, extranum)
                             extranum += 1
-                        logger.info("Encountered an duplicate parameter ID: parameter name has been changed to %s\n" % pid)
+                        def warn_or_err(*args):
+                            if self.duplicate_pnames:
+                                logger.warn(*args)
+                            else:
+                                logger.error(*args)
+                        warn_or_err("Encountered an duplicate parameter ID (%s)\n" % pid_)
+                        warn_or_err("file %s line %i field %i duplicates:\n" 
+                                    % (os.path.basename(ffname), ln+1, pfld))
+                        for dupfnm, dupln, dupfld in zip(dupfnms, duplns, dupflds):
+                            warn_or_err("file %s line %i field %i\n" % (dupfnm, dupln+1, dupfld))
+                        if self.duplicate_pnames:
+                            logger.warn("Parameter name has been changed to %s\n" % pid)
+                        else:
+                            raise RuntimeError
                     self.map[pid] = self.np
                     # This parameter ID has these atoms involved.
                     self.patoms.append([self.Readers[ffname].molatom])
diff --git a/src/gmxio.py b/src/gmxio.py
index 273438ada..bcd0525a8 100644
--- a/src/gmxio.py
+++ b/src/gmxio.py
@@ -1491,12 +1491,15 @@ def __init__(self,options,tgt_opts,forcefield):
         self.set_option(options,'gmxpath')
         # Suffix for GROMACS executables.
         self.set_option(options,'gmxsuffix')
+        # Engine for calculating things locally (e.g. polarization correction)
         self.engine_ = GMX
         # Name of the engine to pass to scripts.
         self.engname = "gromacs"
+        # Valid coordinate suffix.
+        self.crdsfx = ['.gro', '.pdb']
         # Command prefix.
         self.mdpfx = "bash gmxprefix.bash"
         # Scripts to be copied from the ForceBalance installation directory.
-        self.scripts = ['gmxprefix.bash', 'md_chain.py']
+        self.scripts = ['gmxprefix.bash', 'md_one.py']
         ## Initialize base class.
         super(Thermo_GMX,self).__init__(options,tgt_opts,forcefield)
diff --git a/src/nifty.py b/src/nifty.py
index 3fcc29d3b..01f005c1d 100644
--- a/src/nifty.py
+++ b/src/nifty.py
@@ -279,6 +279,18 @@ def floatornan(word):
         logger.info("Setting %s to % .1e\n" % big)
         return big
 
+def isnpnan(var):
+    """ 
+
+    Determine whether a variable is np.nan.  I wrote this function
+    because np.isnan would crash if we use it on a dtype that is not
+    np.float
+    
+    """
+    if type(var) in [np.float, np.float32, np.float64, np.double]:
+        return np.isnan(var)
+    return False
+
 def col(vec):
     """
     Given any list, array, or matrix, return a 1-column matrix.
diff --git a/src/observable.py b/src/observable.py
index a1139105a..f0fa790da 100644
--- a/src/observable.py
+++ b/src/observable.py
@@ -91,7 +91,6 @@ class Observable(object):
     def __init__(self, source, name=None):
         self.name        = name if name is not None else "empty"
         self.sreq = []
-        self.treq = []
         self.dreq = ['energy_derivatives']
                     
     def __str__(self):
@@ -135,12 +134,7 @@ def __init__(self, source, name=None):
         super(Observable_Density, self).__init__(source, name)
         
         self.name = name if name is not None else "density"
-
-        # Calculating the density requires either a liquid or solid simulation.
-        self.sreq = [['liquid', 'solid']]
-
-        # Requires timeseries of densities from the simulation.
-        self.treq = ['density']
+        self.sreq = [{'liquid':['density'], 'solid':['density']}]
 
     def extract(self, engines, FF, mvals, h, pgrad, AGrad=True):         
         #==========================================#
@@ -213,10 +207,8 @@ def __init__(self, source, name=None):
         self.name = name if name is not None else "H_vap"
 
         # Calculating the heat of vaporization requires a liquid simulation and a gas simulation.
-        self.sreq = [['liquid'], ['gas']]
-
-        # Requires timeseries of energies and volumes from the simulation.
-        self.treq = [['energy', 'volume'], ['energy']]
+        self.sreq = [{'liquid':['energy', 'volume']},
+                     {'gas':['energy']}]
 
     def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): 
         #==========================================#
@@ -320,18 +312,11 @@ class Observable_Kappa(Observable):
     def __init__(self, source, name=None):
         """ Compressibility (applies to liquid and lipid bilayer.) """
         super(Observable_H_vap, self).__init__(source, name)
-        
         self.name = name if name is not None else "H_vap"
+        # List of dictionaries of simulation names : timeseries that
+        # we extract from the simulation.
 
-        # List of dictionaries of simulations, keyed to timeseries to extract from the simulation.
-        # Each dictionary represents a simulation in a sequence, but the observable isn't mapped to a unique simulation each time.
-        # Because of this, we determine which simulation to launch (in the sequence) based on the available initial coordinates (or explicit user input).
-        # Depending on which simulation is executed, we require different timeseries from the simulation, and different formulas.
-        # But another way is to just define two observables ... need to think about it.
-
-        self.sreq = [{'liquid':['volume'], 'bilayer':['al']},
-                     ]
-        
-
-        # Requires timeseries of energies and volumes from the simulation.
-        self.treq = [['energy', 'volume'], ['energy']]
+        # Each dictionary represents a simulation needed to calculate
+        # the observable, but the required timeseries depends on the
+        # simulation that we run.
+        self.sreq = [{'liquid':['volume'], 'bilayer':['al']}],
diff --git a/src/parser.py b/src/parser.py
index 340b45723..b21a39214 100644
--- a/src/parser.py
+++ b/src/parser.py
@@ -102,6 +102,7 @@
                  "asynchronous"     : (0, 0, 'Execute Work Queue tasks and local calculations asynchronously for improved speed', 'Targets that use Work Queue (advanced usage)'),
                  "reevaluate"       : (None, 0, 'Re-evaluate the objective function and gradients when the step is rejected (for noisy objective functions).', 'Main Optimizer'),
                  "continue"         : (0, 140, 'Continue the current run from where we left off (supports mid-iteration recovery).', 'Main Optimizer'),
+                 "duplicate_pnames" : (0, -150, 'Allow duplicate parameter names (only if you know what you are doing!', 'Force Field Parser'),
                  },
     'floats'  : {"trust0"                 : (1e-1, 100, 'Levenberg-Marquardt trust radius; set to negative for nonlinear search', 'Main Optimizer'),
                  "mintrust"               : (0.0,   10, 'Minimum trust radius (if the trust radius is tiny, then noisy optimizations become really gnarly)', 'Main Optimizer'),
@@ -158,6 +159,7 @@
                  },
     'lists'   : {"fd_ptypes" : ([], -100, 'The parameter types that are differentiated using finite difference', 'In conjunction with fdgrad, fdhess, fdhessdiag; usually not needed'),
                  "observables" : ([], 100, 'List of observables to be fitted, each must have corresponding Quantity subclass', 'Thermodynamic properties target', 'thermo'),
+                 "simulations" : ([], 100, 'List of simulations to be run (in order to calculate fitting observables)', 'Thermodynamic properties target', 'thermo'),
                  },
     'ints'    : {"shots"              : (-1, 0, 'Number of snapshots; defaults to all of the snapshots', 'Energy + Force Matching', 'AbInitio'),
                  "fitatoms"           : (0, 0, 'Number of fitting atoms; defaults to all of them', 'Energy + Force Matching', 'AbInitio'),
@@ -174,7 +176,6 @@
                  "save_traj"          : (0, -10, 'Whether to save trajectories.  0 = Never save; 1 = Delete if optimization step is good; 2 = Always save', 'Condensed phase properties', 'Liquid, Lipid'),
                  "eq_steps"           : (20000, 0, 'Number of time steps for the equilibration run.', 'Thermodynamic property targets', 'thermo'),
                  "md_steps"           : (50000, 0, 'Number of time steps for the production run.', 'Thermodynamic property targets', 'thermo'),
-                 "n_sim_chain"        : (1, 0, 'Number of simulations required to calculate observables.', 'Thermodynamic property targets', 'thermo'),
                  },
     'bools'   : {"whamboltz"        : (0, -100, 'Whether to use WHAM Boltzmann Weights', 'Ab initio targets with Boltzmann weights (advanced usage)', 'AbInitio'),
                  "sampcorr"         : (0, -150, 'Whether to use the archaic sampling correction', 'Energy + Force Matching, very old option, do not use', 'AbInitio'),
diff --git a/src/thermo.py b/src/thermo.py
index 93e50e866..627b5a7e2 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -11,10 +11,11 @@
 from forcebalance.observable import *
 from forcebalance.target import Target
 from forcebalance.finite_difference import in_fd
-from forcebalance.nifty import flat, col, row, isint
+from forcebalance.nifty import flat, col, row, isint, isnpnan
 from forcebalance.nifty import lp_dump, lp_load, wopen, _exec
-from forcebalance.nifty import LinkFile, link_dir_contents
+from forcebalance.nifty import GoInto, LinkFile, link_dir_contents
 from forcebalance.nifty import printcool, printcool_dictionary
+from forcebalance.nifty import getWorkQueue
 
 from collections import defaultdict, OrderedDict
 
@@ -363,9 +364,9 @@ def __init__(self, options, tgt_opts, forcefield):
         # Source data (experimental data, model parameters and weights)
         self.set_option(tgt_opts, "source", forceprint=True)
         # Observables to calculate
-        self.set_option(tgt_opts, "observables", "observable_names", forceprint=True)
+        self.set_option(tgt_opts, "observables", "user_observable_names", forceprint=True)
         # Length of simulation chain
-        self.set_option(tgt_opts, "n_sim_chain", forceprint=True)
+        self.set_option(tgt_opts, "simulations", "user_simulation_names", forceprint=True)
         # Number of time steps in the equilibration run
         self.set_option(tgt_opts, "eq_steps", forceprint=True)
         # Number of time steps in the production run
@@ -380,10 +381,6 @@ def __init__(self, options, tgt_opts, forcefield):
         self.denoms    = {}
         # Weights for observables
         self.weights   = {}
-        # Suffixes for coordinate files
-        self.crdsfx    = {'gromacs':['.gro', '.pdb'], 
-                          'tinker':['.xyz', '.arc'], 
-                          'openmm':['.pdb']}[self.engname.lower()]
 
         ## A mapping that takes us from observable names to Observable objects.
         self.Observable_Map = {'density' : Observable_Density,
@@ -401,7 +398,7 @@ def __init__(self, options, tgt_opts, forcefield):
                      os.path.join(self.root, self.tempdir, f))
 
         ## Set up simulations
-        #self.determine_simulations()
+        self.prepare_simulations()
 
     def read_source(self, srcfnm):
         """Read and store source data.
@@ -533,6 +530,9 @@ def reffld_error(reason=''):
         # Turn it into a pandas DataFrame.
         self.Data = pd.DataFrame(drows, columns=revhead[1:], index=pd.MultiIndex.from_tuples(index, names=['index', 'subindex']))
 
+        # self.collapse = 
+        # self.
+
         # A list of indices (i.e. top-level indices) which correspond
         # to sets of simulations that we'll be running.
         self.Indices = []
@@ -540,17 +540,36 @@ def reffld_error(reason=''):
             if idx[0] not in self.Indices:
                 self.Indices.append(idx[0])
 
+        # Certain things (e.g. run parameters like temp, pres) are keyed to the index only.
+        chead = []
+        crows = []
+        for index in self.Indices:
+            crow = []
+            for head in ['temp', 'pres']:
+                if head not in self.Data: continue
+                if head not in chead: chead.append(head)
+                rlist = list(set([i for i in self.Data.ix[index][head][:] if not isnpnan(i)]))
+                if len(rlist) != 1:
+                    logger.error('Heading %s should appear once for index %s (found %i)' % (head, index, len(rlist)))
+                    raise RuntimeError
+                crow.append(rlist[0])
+            crows.append(crow[:])
+
+        # Now create the mini data table.
+        self.Data2 = pd.DataFrame(crows, columns=chead, index=self.Indices)
+
         # A list of Observable objects (i.e. column headings) which
         # contain methods for calculating observables that we need.
         # Think about: 
         # (1) How much variability is allowed across Indices?
         #     For instance, different S_cd is permissible.
         self.Observables = OrderedDict()
-        for obsname in [stand_head(i, '')[2] for i in self.observable_names]:
+        self.ObsNames = []
+        for obsname in [stand_head(i, '')[2] for i in self.user_observable_names]:
             if obsname in self.Observables:
                 logger.error('%s was already specified as an observable' % (obsname))
             self.Observables[obsname] = OrderedDict()
-            for ie, index in enumerate(self.Indices):
+            for index in self.Indices:
                 if obsname in self.Observable_Map:
                     newobs = self.Observable_Map[obsname](source=self.Data.ix[index])
                     logger.info('%s is specified as an observable, appending %s class\n' % (obsname, newobs.__class__.__name__))
@@ -558,24 +577,16 @@ def reffld_error(reason=''):
                 else:
                     logger.warn('%s is specified but there is no corresponding Observable class, appending empty one\n' % obsname)
                     self.Observables[obsname][index] = Observable(name=obsname, source=self.Data.ix[index])
+            self.ObsNames.append(obsname)
 
         print self.Indices
         print self.Observables
         print repr(self.Data)
+        # if 'temp' in self.Data:
+        #     self.Temperatures = OrderedDict()
+        #     for index in self.Indices:
+        # print self.Data['temp'][0]
         raw_input()
-        # for index in self.Indices:
-        #     self.Observables[index] = []
-        # for obsname in obsnames:
-        #     for index, ie in enumerate(self.Indices):
-        #         if obsname in self.Observable_Map:
-        #             newobs = self.Observable_Map[obsname](source=self.Data.ix[index])
-        #             if newobs.name in [obs.name for obs in self.Observables[index]]:
-        #                 logger.error('%s is specified but a %s observable already exists' % (obsname, newobs.__class__.__name__))
-        #             logger.info('%s is specified as an observable, appending %s class\n' % (obsname, newobs.__class__.__name__))
-        #             self.Observables[index].append(newobs)
-        #         else:
-        #             logger.warn('%s is specified but there is no corresponding Observable class, appending empty one\n' % obsname)
-        #             self.Observables[index].append(Observable(name=obsname, source=self.Data.ix[index]))
         return
 
     def find_ic(self, index, stype, icn):
@@ -623,180 +634,8 @@ def find_ic(self, index, stype, icn):
                         logger.info('Target %s Index %s Simulation %s : '
                                     'found initial coordinate file %s\n' % (self.name, index, stype, fpath))
                         found = fpath
-        # if found == '':
-        #     logger.error('Target %s Index %s Simulation %s : '
-        #                  'could not find initial coordinate file\n'
-        #                  'Please provide one of the following:\n%s' 
-        #                  % (self.name, index, stype, '\n'.join(paths)))
-        #     raise RuntimeError
         return found, 0 if numbered else icn
     
-    def determine_simulations(self):
-
-        """ 
-        Determine which simulations need to be run.  The same
-        simulations are run for each index in the data set.
-
-        Note that there may be a different number of initial
-        conditions (i.e. parallel runs) for different indices.
-        """
-
-        # Determine which simulations are needed.
-        sreqs = OrderedDict()
-        for obsname in self.Observables:
-            sreqs[obsname] = self.Observables[obsname][self.Indices[0]].sreq
-
-        # toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs]))
-
-        self.Simulations = OrderedDict([(i, OrderedDict()) for i in self.Indices])
-        tsnames = []
-        for obsname in self.Observables:
-            treqs = self.Observables[obsname][self.Indices[0]].treq
-            for treq in treqs:
-                if treq not in tsnames:
-                    tsnames.append(treq)
-
-        for index in self.Indices:
-            # Loop over observable names.  Here we determine whether
-            # the initial coordinates are missing (bad), unique (good) or ambiguous (bad).
-            if 'n_ic' in self.Data.ix[index]:
-                n_ic = self.Data.ix[index]['n_ic']
-            else:
-                n_ic = 1
-            for obsname in sreqs:
-                for stypes in sreqs[obsname]:
-                    if isinstance(stypes, str):
-                        stypes = [stypes]
-                    for icn in range(n_ic):
-                        icfiles = []
-                        svalid = []
-                        for stype in stypes:
-                            fpath, iframe = self.find_ic(index, stype, icn)
-                            if fpath != '':
-                                icfiles.append(fpath)
-                                svalid.append(stype)
-                        if len(icfiles) == 0:
-                            logger.error('Target %s Index %s Simulation %s : '
-                                         'could not find initial coordinate file\n'
-                                         % (self.name, index, stype))
-                            raise RuntimeError
-                        elif len(icfiles) > 1:
-                            logger.error('Target %s Index %s Simulation %s : '
-                                         'ambiguous initial coordinate files (%s)'
-                                         % (self.name, index, stype, ' '.join(icfiles)))
-                        self.Simulations[index][svalid[0]] = Simulation(index, svalid[0], icfiles[0], iframe, tsnames)
-                    
-        print self.Simulations
-        print tsnames
-        # raw_input()
-                # if isinstance(sreqs[obsname], str):
-                #     stypes = [sreqs[obsname]]
-                # for stype in stypes:
-                #     print index, stype
-                
-        # for stype in toplevel:
-        #     for index in self.Indices:
-        #         def find_ic(icn):
-        #             found = ''
-        #             # Initial condition files will be searched for in the following priority:
-        #             # targets/target_name/index/stype/ICs/stype_#.xyz
-        #             # targets/target_name/index/stype/ICs/stype#.xyz
-        #             # targets/target_name/index/stype/ICs/#.xyz
-        #             # targets/target_name/index/stype/ICs/stype.xyz
-        #             # targets/target_name/index/stype/ICs/coords.xyz
-        #             # targets/target_name/index/stype/stype.xyz
-        #             # targets/target_name/index/stype/coords.xyz
-        #             # targets/target_name/index/stype.xyz
-        #             # targets/target_name/stype.xyz
-        #             basefnms = [(os.path.join(index, stype, 'ICs', stype+'_'+("%i" % icn)), True),
-        #                         (os.path.join(index, stype, 'ICs', stype+("%i" % icn)), True),
-        #                         (os.path.join(index, stype, 'ICs', ("%i" % icn)), True),
-        #                         (os.path.join(index, stype, 'ICs', stype), False),
-        #                         (os.path.join(index, stype, 'ICs', 'coords'), False),
-        #                         (os.path.join(index, stype, stype), False),
-        #                         (os.path.join(index, stype, 'coords'), False),
-        #                         (os.path.join(index, stype), False),
-        #                         (os.path.join(stype), False)]
-        #             paths = []
-        #             for fnm, numbered in basefnms:
-        #                 for crdsfx in self.crdsfx:
-        #                     fpath = os.path.join(self.tgtdir, fnm+crdsfx)
-        #                     paths.append(fpath)
-        #                     if os.path.exists(fpath):
-        #                         if found != '':
-        #                             logger.info('Target %s Index %s Simulation %s : '
-        #                                         '%s overrides %s\n' % (self.name, index, stype, fpath))
-        #                         else:
-        #                             if not numbered:
-        #                                 M = Molecule(fpath)
-        #                                 if len(M) <= icn:
-        #                                     logger.error("Target %s Index %s Simulation %s : "
-        #                                                  "initial coordinate file %s doesn't have enough structures\n" % 
-        #                                                  (self.name, index, stype, fpath))
-        #                                     raise RuntimeError
-        #                             logger.info('Target %s Index %s Simulation %s : '
-        #                                         'found initial coordinate file %s\n' % (self.name, index, stype, fpath))
-        #                             found = fpath
-        #             if found == '':
-        #                 logger.error('Target %s Index %s Simulation %s : '
-        #                              'could not find initial coordinate file\n'
-        #                              'Please provide one of the following:\n%s' 
-        #                              % (self.name, index, stype, '\n'.join(paths)))
-        #                 raise RuntimeError
-        #             return found
-        #         if 'n_ic' in self.Data.ix[index]:
-        #             n_ic = self.Data.ix[index]['n_ic']
-        #         else:
-        #             n_ic = 1
-        #         for i in range(n_ic):
-        #             fpath = find_ic(i)
-
-        raw_input()
-        
-        # toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs]))
-        # print toplevel
-        # raw_input()
-        # return
-
-        # def narrow():
-        #     # Get the names of simulations that are REQUIRED to calculate the observables.
-        #     toplevel = list(itertools.chain(*[[j for j in sreqs[i] if type(j) == str] for i in sreqs]))
-        #     # Whoa, this is a deeply nested loop.  What does it do?
-        #     # First loop over the elements in "sreqs" for each observable name.
-        #     # If the element is a string, then it's a required simulation name (top level).
-        #     # If the element is a list, then it's a list of valid simulation names
-        #     # and we need to narrow the list down.
-        #     # For the ones that are lists (and have any intersection with the top level),
-        #     # delete the ones that don't intersect.
-        #     sreq0 = copy.deepcopy(sreqs)
-        #     for obsname in sreqs:
-        #         for sims in sreqs[obsname]:
-        #             if type(sims) == list:
-        #                 if len(sims) == 1:
-        #                     sreqs[obsname] = [sims[0]]
-        #                 elif any([i in sims for i in toplevel]):
-        #                     for j in sims:
-        #                         if j not in toplevel: sims.remove(j)
-        #     return sreqs != sreq0
-
-        # print sreqs
-        # while narrow():
-        #     print sreqs
-        # For the leftover observables where there is still some ambiguity,
-        # we attempt 
-        # To do: Figure this out from existing initial conditions maybe
-        # for obsname in sreqs:
-        #     for sims in sreqs[obsname]:
-        #         if type(sims) == list:
-        #             for sim in sims:
-        #                 if has_ic(sim):
-        #                     sreqs[obsname] = [sim]
-                        
-
-        # self.Simulations = OrderedDict([(i, []) for i in self.Indices])
-        
-        return
-
     def prepare_simulations(self):
 
         """ 
@@ -810,110 +649,51 @@ def prepare_simulations(self):
         # print narrow()
             
         # The list of simulations that we'll be running.
+        self.SimNames = [i.lower() for i in self.user_simulation_names]
         self.Simulations = OrderedDict([(i, []) for i in self.Indices])
-        
-        return
-
-    def launch_simulation(self, index, simname):
-
-        """ 
-
-        Launch a simulation - either locally or via the Work Queue.
-        This function is intended to be run within the folder:
-        target_name/iteration_number/system_index/simulation_name/initial_condition OR 
-        target_name/iteration_number/system_index/simulation_name
-        
-        """
-        
-        wq = getWorkQueue()
-        if not (os.path.exists('result.p') or os.path.exists('result.p.bz2')):
-            link_dir_contents(os.path.join(self.root,self.rundir),os.getcwd())
-            self.last_traj += [os.path.join(os.getcwd(), i) for i in self.extra_output]
-            self.liquid_mol[simnum%len(self.liquid_mol)].write(self.liquid_coords, ftype='tinker' if self.engname == 'tinker' else None)
-            cmdstr = '%s python npt.py %s %.3f %.3f' % (self.nptpfx, self.engname, temperature, pressure)
-            if wq == None:
-                logger.info("Running condensed phase simulation locally.\n")
-                logger.info("You may tail -f %s/npt.out in another terminal window\n" % os.getcwd())
-                _exec(cmdstr, copy_stderr=True, outfnm='npt.out')
-            else:
-                queue_up(wq, command = cmdstr+' &> npt.out',
-                         input_files = self.nptfiles + self.scripts + ['forcebalance.p'],
-                         output_files = ['npt_result.p.bz2', 'npt.out'] + self.extra_output, tgt=self)
-    
-    # NAMES FOR OBJECTS!  
-
-    # Timeseries: Time series of an instantaneous observable that is
-    # returned by the MD simulation.
-
-    # Observable: A thermodynamic property which can be compared to
-    # experiment and possesses methods for calculating the property
-    # and its derivatives.
-
-    # State? Point? What should this be called??
-
-        # # print revhead[1:]
-        # # for rn, row in enumerate(drows):
-        # #     print index[rn], row
-
-        # # print repr(self.Data)
-
-        # # # pd.DataFrame([OrderedDict([(head, row[i]) for i, head in revised_heading if row[i] != '']) for row in source.table])
-
-
-        # # # pd.DataFrame(OrderedDict([(head,[row[i] for row in source.table]) for i, head in enumerate(revised_heading)]))
-        # # # print self.Data.__repr__
-        # # # raw_input()
-
-        # # return
+        # Dictionary of time series to extract from each simulation.
+        SimTS = defaultdict(set)
+        # Check to see whether each observable can be unambiguously calculated from the specified simulations
+        for obsname in self.ObsNames:
+            sreq = self.Observables[obsname][self.Indices[0]].sreq
+            ssels = []
+            SimTS_ = defaultdict(set)
+            for sdct in sreq:
+                # This is a dictionary of simulation names : 
+                if len(set(self.SimNames).intersection(sdct.keys())) > 1:
+                    logger.error("Ambiguous: Don't know which simulation to use in calculating observable %s" % obsname)
+                    logger.error("Choose ONE from this menu of required simulations: [%s]" % ' '.join(sdct.keys()))
+                    raise RuntimeError
+                if len(set(self.SimNames).intersection(sdct.keys())) == 0:
+                    logger.error("Missing Simulation: Cannot calculate observable %s" % obsname)
+                    logger.error("Choose ONE from this menu of required simulations: [%s]" % ' '.join(sdct.keys()))
+                    raise RuntimeError
+                # One of the simulations that will be used in calculating this observable.
+                ssel = list(set(self.SimNames).intersection(sdct.keys()))[0]
+                SimTS[ssel].update(set(sdct[ssel]))
+                SimTS_[ssel].update(set(sdct[ssel]))
+            printcool_dictionary({i:' '.join(sorted(list(SimTS_[i]))) for i in sorted(SimTS_.keys())}, title="Observable %s uses these simulations : timeseries" % obsname)
+        printcool_dictionary({i:' '.join(sorted(list(SimTS[i]))) for i in sorted(SimTS.keys())}, title="Needed Simulations : Extracted Timeseries")
+        unused = sorted(list(set(self.SimNames).difference(set(SimTS.keys()))))
+        if len(unused) > 0:
+            logger.error("Simulation %s is specified but it's never used to calculate any observables" % ', '.join(unused))
+            raise RuntimeError
 
-        # fp = open(expdata)
-        
-        # line         = fp.readline()
-        # foundHeader  = False
-        # names        = None
-        # units        = None
-        # label_header = None
-        # label_unit   = None
-        # count        = 0
-        # metadata     = {}
-        # while line:
-        #     # Skip comments and blank lines
-        #     if line.lstrip().startswith("#") or not line.strip():
-        #         line = fp.readline()
-        #         continue
-        #     # Metadata is denoted using 
-        #     if "=" in line: # Read variable
-        #         param, value = line.split("=")
-        #         param = param.strip().lower()
-        #         metadata[param] = value
-        #         # if param == "denoms":
-        #         #     for e, v in enumerate(value.split()):
-        #         #         self.denoms[self.observables[e]] = float(v)
-        #         # elif param == "weights":
-        #         #     for e, v in enumerate(value.split()):
-        #         #         self.weights[self.observables[e]] = float(v)
-        #     elif foundHeader: # Read exp data
-        #         count      += 1
-        #         vals        = line.split()
-        #         label       = (vals[0], label_header, label_unit)
-        #         refs        = np.array(vals[1:-2:2]).astype(float)
-        #         wts         = np.array(vals[2:-2:2]).astype(float)
-        #         temperature = float(vals[-2])
-        #         pressure    = None if vals[-1].lower() == "none" else \
-        #           float(vals[-1])
-        #         dp = Point(count, label=label, refs=refs, weights=wts,
-        #                    names=names, units=units,
-        #                    temperature=temperature, pressure=pressure)
-        #         self.points.append(dp)
-        #     else: # Read headers
-        #         foundHeader = True
-        #         headers = zip(*[tuple(h.split("_")) for h in line.split()
-        #                         if h != "w"])
-        #         label_header = list(headers[0])[0]
-        #         label_unit   = list(headers[1])[0]
-        #         names        = list(headers[0][1:-2])
-        #         units        = list(headers[1][1:-2])
-        #     line = fp.readline()            
+        for index in self.Indices:
+            for stype, tsset in SimTS.items():
+                if 'n_ic' in self.Data.ix[index]:
+                    n_ic = self.Data.ix[index]['n_ic']
+                    if n_ic < 1:
+                        logger.error("n_ic must >= 1")
+                        raise RuntimeError
+                else:
+                    n_ic = 1
+                for icn in range(n_ic):
+                    icfnm, icframe = self.find_ic(index, stype, icn)
+                    sname = "%s_%i" % (stype, icn) if n_ic > 1 else stype
+                    self.Simulations[index].append(Simulation(sname, index, stype, icfnm, icframe, sorted(list(tsset))))
+                    
+        return
     
     def retrieve(self, dp):
         """Retrieve the molecular dynamics (MD) results and store the calculated
@@ -950,7 +730,7 @@ def retrieve(self, dp):
             
     def submit_jobs(self, mvals, AGrad=True, AHess=True):
         """This routine is called by Objective.stage() and will run before "get".
-        It submits the jobs and the stage() function will wait for jobs
+        It submits the jobs (or runs them locally) and the stage() function will wait for jobs
         to complete.
 
         Parameters
@@ -967,6 +747,49 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True):
         Nothing.
         
         """
+
+        printcool("Submitting jobs")
+        cwd = os.getcwd()
+        wq = getWorkQueue()
+        for index in self.Indices:
+            # if 'temp' in self.Data:
+            #     tset = set([iself.Data['temp'].ix[index][:])
+            temp = self.Data2['temp'].ix[index] if 'temp' in self.Data2 else None
+            pres = self.Data2['pres'].ix[index] if 'pres' in self.Data2 else None
+            for Sim in self.Simulations[index]:
+                simd = os.path.join(os.getcwd(), index, Sim.name)
+                GoInto(simd)
+                # Submit or run the simulation if the result file does not exist.
+                if not (os.path.exists('result.p') or os.path.exists('result.p.bz2')):
+                    # Write to disk: Force field object, current parameter values, target options
+                    with wopen('forcebalance.p') as f: lp_dump((self.FF,mvals,self.OptionDict),f)
+                    M = Molecule(os.path.join(self.root, Sim.initial))[Sim.iframe]
+                    M.write("%s%s" % (Sim.stype, self.crdsfx[0]))
+                    # # Get relevant files from the target folder, I suppose.
+                    # link_dir_contents(os.path.join(self.root,self.rundir),os.getcwd())
+                    # # Determine initial coordinates.
+                    # self.last_traj += [os.path.join(os.getcwd(), i) for i in self.extra_output]
+                    # self.liquid_mol[simnum%len(self.liquid_mol)].write(self.liquid_coords, ftype='tinker' if self.engname == 'tinker' else None)
+                    # Command for running the simulation.
+                    cmdlist = ['%s python md_one.py %s' % (self.mdpfx, Sim.stype)]
+                    if temp != None:
+                        cmdlist.append('-T %f' % float(temp))
+                    if pres != None:
+                        cmdlist.append('-P %f' % float(pres))
+                    cmdstr = ' '.join(cmdlist)
+                    print cmdstr
+                    # # cmdstr = '%s python md1.py %s %.3f %.3f' % (self.runpfx, temperature, pressure)
+                    # if wq == None:
+                    #     logger.info("Running condensed phase simulation locally.\n")
+                    #     logger.info("You may tail -f %s/npt.out in another terminal window\n" % os.getcwd())
+                    #     _exec(cmdstr, copy_stderr=True, outfnm='npt.out')
+                    # else:
+                    #     queue_up(wq, command = cmdstr+' &> npt.out',
+                    #              input_files = self.nptfiles + self.scripts + ['forcebalance.p'],
+                    #              output_files = ['npt_result.p.bz2', 'npt.out'] + self.extra_output, tgt=self)
+                os.chdir(cwd)
+        return
+
         # Set up and run the simulation chain on all points.
         for pt in self.points:
             # Create subdir
@@ -1254,31 +1077,43 @@ class Simulation(object):
     type, initial condition).
     """
 
-    def __init__(self, index, stype, initial, iframe, tsnames):
+    def __init__(self, name, index, stype, initial, iframe, tsnames):
+        # The simulation name will identify the simulation within a collection
+        # belonging to the Index.
+        self.name = name
+        # The Index that the simulation belongs to.
         self.index = index
+        # The type of simulation (liquid, gas, solid, bilayer...)
         self.stype = stype
+        # The file containing initial coordinates.
         self.initial = initial
+        # The frame number in the initial coordinate file.
         self.iframe = iframe
+        # The time series for the simulation.
         self.timeseries = OrderedDict([(i, []) for i in tsnames])
 
     def __str__(self):
         msg = []
-        if self.temperature is None:
-            msg.append("State: Unknown.")
-        elif self.pressure is None:
-            msg.append("State: Point " + str(self.idnr) + " at " +
-                       str(self.temperature) + " K.")
-        else:
-            msg.append("State: Point " + str(self.idnr) + " at " +
-                       str(self.temperature) + " K and " +
-                       str(self.pressure) + " bar.")
-
-        msg.append("Point " + str(self.idnr) + " reference data " + "-"*30)
-        for key in self.ref:
-            msg.append("  " + key.strip() + " = " + str(self.ref[key]).strip())
+        msg.append("Simulation: Name %s, Index %s, Type %s" % (self.name, self.index, self.stype))
+        msg.append("Initial Conditions: File %s Frame %i" % (self.initial, self.iframe))
+        msg.append("Timeseries Names: %s" % (', '.join(self.timeseries.keys())))
+        return "\n".join(msg)
+        # if self.temperature is None:
+        #     msg.append("State: Unknown.")
+        # elif self.pressure is None:
+        #     msg.append("State: Point " + str(self.idnr) + " at " +
+        #                str(self.temperature) + " K.")
+        # else:
+        #     msg.append("State: Point " + str(self.idnr) + " at " +
+        #                str(self.temperature) + " K and " +
+        #                str(self.pressure) + " bar.")
+
+        # msg.append("Point " + str(self.idnr) + " reference data " + "-"*30)
+        # for key in self.ref:
+        #     msg.append("  " + key.strip() + " = " + str(self.ref[key]).strip())
             
-        msg.append("Point " + str(self.idnr) + " calculated data " + "-"*30)
-        for key in self.data:
-            msg.append("  " + key.strip() + " = " + str(self.data[key]).strip())
+        # msg.append("Point " + str(self.idnr) + " calculated data " + "-"*30)
+        # for key in self.data:
+        #     msg.append("  " + key.strip() + " = " + str(self.data[key]).strip())
 
-        return "\n".join(msg)
+        # return "\n".join(msg)
diff --git a/studies/004_thermo/single.in b/studies/004_thermo/single.in
index 275265680..6cd1bec2c 100644
--- a/studies/004_thermo/single.in
+++ b/studies/004_thermo/single.in
@@ -68,7 +68,17 @@ type Thermo_GMX
 weight 1.0
 source expset.txt
 quantities density h_vap
-n_sim_chain 2
+simulations liquid gas
+md_steps 100000
+eq_steps 50000
+$end
+
+$target
+name Lipid_TAB
+type Thermo_GMX
+weight 1.0
+source lipidcol1.txt
+observables al scd kappa
 md_steps 100000
 eq_steps 50000
 $end
diff --git a/studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt b/studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt
index c67aece18..2001d85b1 100644
--- a/studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt
+++ b/studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt
@@ -1,6 +1,6 @@
 Index	T	P	Al	Al_wt	Scd1_idx	Scd1	Scd2_idx	Scd2	Scd1_wt	Kappa	Kappa_wt	n_ic
-50C	323.15	1	0.631	1	C15		C34		1	58	1	10
-					C17	0.198144	C36	0.198144				
+50C		1	0.631	1	C15		C34		1	58	1	10
+	323.15				C17	0.198144	C36	0.198144				
 					C18	0.198128	C37	0.198128				
 					C19	0.198111	C38	0.198111				
 					C20	0.198095	C39	0.198095				

From df46c9a5e02e717f0d6158a3d9df4d53cdd1dbdc Mon Sep 17 00:00:00 2001
From: Lee-Ping <leeping@stanford.edu>
Date: Sun, 20 Apr 2014 16:48:11 -0700
Subject: [PATCH 13/25] A few changes for energy/force matching and Q-Chem
 output parsing (cherry pick over to main).

---
 src/abinitio.py | 36 ++++++++++++++++++++++++++++++------
 src/molecule.py |  5 +++++
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/src/abinitio.py b/src/abinitio.py
index b6844cd51..7d288e18c 100644
--- a/src/abinitio.py
+++ b/src/abinitio.py
@@ -446,28 +446,34 @@ def read_reference_data(self):
             self.ntq = 0
 
     def indicate(self):
-        Headings = ["Observable", "Difference\n(Calc-Ref)", "Denominator\n RMS (Ref)", " Percent \nDifference", "Weight"]
+        Headings = ["Observable", "Difference\n(Calc-Ref)", "Denominator\n RMS (Ref)", " Percent \nDifference", "Weight", "Contribution"]
         Data = OrderedDict([])
         if self.energy:
             Data['Energy (kJ/mol)'] = ["%8.4f" % self.e_err,
                                        "%8.4f" % self.e_ref,
                                        "%.4f%%" % (self.e_err_pct*100),
-                                       "%.3f" % self.w_energy]
+                                       "%.3f" % self.w_energy,
+                                       "%8.4f" % self.e_ctr]
         if self.force:
             Data['Gradient (kJ/mol/A)'] = ["%8.4f" % (self.f_err/10),
                                            "%8.4f" % (self.f_ref/10),
                                            "%.4f%%" % (self.f_err_pct*100),
-                                           "%.3f" % self.w_force]
+                                           "%.3f" % self.w_force,
+                                           "%8.4f" % self.f_ctr]
             if self.use_nft:
                 Data['Net Force (kJ/mol/A)'] = ["%8.4f" % (self.nf_err/10),
                                                 "%8.4f" % (self.nf_ref/10),
                                                 "%.4f%%" % (self.nf_err_pct*100),
-                                                "%.3f" % self.w_netforce]
+                                                "%.3f" % self.w_netforce,
+                                                "%8.4f" % self.nf_ctr]
                 Data['Torque (kJ/mol/rad)'] = ["%8.4f" % self.tq_err,
                                                "%8.4f" % self.tq_ref,
                                                "%.4f%%" % (self.tq_err_pct*100),
-                                               "%.3f" % self.w_torque]
+                                               "%.3f" % self.w_torque,
+                                               "%8.4f" % self.tq_ctr]
         self.printcool_table(data=Data, headings=Headings, color=0)
+        if self.force:
+            logger.info("Maximum force error on atom %i (%s), frame %i, %8.4f kJ/mol/A\n" % (self.maxfatom, self.mol.elem[self.maxfatom], self.maxfshot, self.maxdf/10))
 
     def energy_all(self):
         if hasattr(self, 'engine'):
@@ -710,6 +716,10 @@ def callM(mvals_):
                     return self.energy_force_transform()
                 for p in self.pgrad:
                     dM_all[:,p,:], ddM_all[:,p,:] = f12d3p(fdwrap(callM, mvals, p), h = self.h, f0 = M_all)
+        if self.force and not in_fd():
+            self.maxfatom = -1
+            self.maxfshot = -1
+            self.maxdf = 0.0
         for i in range(NS):
             if i % 100 == 0:
                 logger.debug("\rIncrementing quantities for snapshot %i\r" % i)
@@ -740,7 +750,12 @@ def callM(mvals_):
             # Increment the average values.
             a = 1
             if self.force:
-                dfrcarray = np.mean(np.array([np.linalg.norm(M[a+3*j:a+3*j+3] - Q[a+3*j:a+3*j+3]) for j in range(nat)]))
+                dfrcarray_ = np.array([np.linalg.norm(M[a+3*j:a+3*j+3] - Q[a+3*j:a+3*j+3]) for j in range(nat)])
+                if not in_fd() and np.max(dfrcarray_) > self.maxdf:
+                    self.maxdf = np.max(dfrcarray_)
+                    self.maxfatom = np.argmax(dfrcarray_)
+                    self.maxfshot = i
+                dfrcarray = np.mean(dfrcarray_)
                 qfrcarray = np.mean(np.array([np.linalg.norm(Q[a+3*j:a+3*j+3]) for j in range(nat)]))
                 dF_M    += P*dfrcarray
                 dF_Q    += R*dfrcarray
@@ -1035,17 +1050,26 @@ def callM(mvals_):
             dTfrac = MBP * dT_M / qT_M + QBP * dT_Q / qT_Q
         # Save values to qualitative indicator if not inside finite difference code.
         if not in_fd():
+            # Contribution from energy and force parts.
+            self.e_ctr = (MBP * weighted_variance(np.array([SPiXi[0]]),np.array([WCiW[0]]),Z,X0_M,X0_M,NCP1,subtract_mean = not self.absolute) + 
+                          QBP * weighted_variance(np.array([SRiXi[0]]),np.array([WCiW[0]]),Y,X0_Q,X0_Q,NCP1,subtract_mean = not self.absolute))
             self.e_ref = MBP * np.sqrt(QQ_M[0]/Z - Q0_M[0]**2/Z/Z) + QBP * np.sqrt((QQ_Q[0]/Y - Q0_Q[0]**2/Y/Y))
             self.e_err = dE
             self.e_err_pct = dEfrac
             if self.force:
+                self.f_ctr = (MBP * weighted_variance(SPiXi[1:1+3*nat],WCiW[1:1+3*nat],Z,X0_M,X0_M,NCP1,subtract_mean = False) + 
+                              QBP * weighted_variance(SRiXi[1:1+3*nat],WCiW[1:1+3*nat],Y,X0_Q,X0_Q,NCP1,subtract_mean = False))
                 self.f_ref = qF
                 self.f_err = dF
                 self.f_err_pct = dFfrac
             if self.use_nft:
+                self.nf_ctr = (MBP * weighted_variance(SPiXi[1+3*nat:1+3*nat+3*nnf],WCiW[1+3*nat:1+3*nat+3*nnf],Z,X0_M,X0_M,NCP1,subtract_mean = False) + 
+                               QBP * weighted_variance(SRiXi[1+3*nat:1+3*nat+3*nnf],WCiW[1+3*nat:1+3*nat+3*nnf],Y,X0_Q,X0_Q,NCP1,subtract_mean = False))
                 self.nf_ref = qN
                 self.nf_err = dN
                 self.nf_err_pct = dNfrac
+                self.tq_ctr = (MBP * weighted_variance(SPiXi[1+3*nat+3*nnf:1+3*nat+3*nnf+3*ntq],WCiW[1+3*nat+3*nnf:1+3*nat+3*nnf+3*ntq],Z,X0_M,X0_M,NCP1,subtract_mean = False) + 
+                               QBP * weighted_variance(SRiXi[1+3*nat+3*nnf:1+3*nat+3*nnf+3*ntq],WCiW[1+3*nat+3*nnf:1+3*nat+3*nnf+3*ntq],Y,X0_Q,X0_Q,NCP1,subtract_mean = False))
                 self.tq_ref = qT
                 self.tq_err = dT
                 self.tq_err_pct = dTfrac
diff --git a/src/molecule.py b/src/molecule.py
index 2f3ccbd34..2e17bcc05 100644
--- a/src/molecule.py
+++ b/src/molecule.py
@@ -2454,6 +2454,11 @@ def read_qcout(self, fnm, errok = [], **kwargs):
                     sline = line.split()
                     mkchgThis.append(float(sline[2]))
                     mkspnThis.append(float(sline[3]))
+                elif re.match("^[0-9]+ +[A-Z][a-z]?( +[-+]?([0-9]*\.)?[0-9]+){1}$", line):
+                    MMode = 2
+                    sline = line.split()
+                    mkchgThis.append(float(sline[2]))
+                    mkspnThis.append(0.0)
                 elif MMode == 2: # Break out of the loop if we encounter anything other than Mulliken charges
                     mkchg.append(mkchgThis[:])
                     mkspn.append(mkspnThis[:])

From adc1ff4b0f305919bd7a24b2e6eec7f47533c102 Mon Sep 17 00:00:00 2001
From: Lee-Ping <leeping@stanford.edu>
Date: Mon, 21 Apr 2014 09:34:47 -0700
Subject: [PATCH 14/25] Improvements for energy/force and frequency matching

---
 src/abinitio.py  | 24 +++++++++++++++++++++---
 src/molecule.py  |  2 ++
 src/parser.py    |  2 +-
 src/vibration.py | 18 +++++++++++++-----
 4 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/src/abinitio.py b/src/abinitio.py
index 7d288e18c..79ff246ba 100644
--- a/src/abinitio.py
+++ b/src/abinitio.py
@@ -6,7 +6,7 @@
 
 import os
 import shutil
-from forcebalance.nifty import col, eqcgmx, flat, floatornan, fqcgmx, invert_svd, kb, printcool, bohrang, warn_press_key, warn_once
+from forcebalance.nifty import col, eqcgmx, flat, floatornan, fqcgmx, invert_svd, kb, printcool, bohrang, warn_press_key, warn_once, pvec1d
 import numpy as np
 from forcebalance.target import Target
 from forcebalance.molecule import Molecule, format_xyz_coord
@@ -445,6 +445,10 @@ def read_reference_data(self):
             self.nnf = 0
             self.ntq = 0
 
+        # Normalize Boltzmann weights.
+        self.boltz_wts /= sum(self.boltz_wts)
+        self.qmboltz_wts /= sum(self.qmboltz_wts)
+
     def indicate(self):
         Headings = ["Observable", "Difference\n(Calc-Ref)", "Denominator\n RMS (Ref)", " Percent \nDifference", "Weight", "Contribution"]
         Data = OrderedDict([])
@@ -688,6 +692,8 @@ def get_energy_force(self, mvals, AGrad=False, AHess=False):
             # Objective functions
             SPiXi = np.zeros(NCP1)
             SRiXi = np.zeros(NCP1)
+            # Debug: Store all objective function contributions
+            XiAll = np.zeros((NS, NCP1))
             if AGrad:
                 SPiXi_p = np.zeros((NP,NCP1))
                 SRiXi_p = np.zeros((NP,NCP1))
@@ -792,6 +798,7 @@ def callM(mvals_):
                 Xi  = np.outer(M,M) - 2*np.outer(Q,M) + np.outer(Q,Q)
             else:
                 Xi     = X**2                   
+            XiAll[i] = Xi.copy()
             SPiXi += P * Xi
             SRiXi += R * Xi
             #==============================================================#
@@ -972,8 +979,11 @@ def callM(mvals_):
             MBP  = 1 - self.qmboltz
             C    = MBP*(QQ_M-Q0_M*Q0_M/Z)/Z + QBP*(QQ_Q-Q0_Q*Q0_Q/Y)/Y
             # Normalize the force components
-            for i in range(1, len(C), 3):
-                C[i:i+3] = np.mean(C[i:i+3])
+            # Normalize by atom?
+            # for i in range(1, len(C), 3):
+            #     C[i:i+3] = np.mean(C[i:i+3])
+            # Or normalize all forces?
+            C[1:len(C)] = np.mean(C[1:len(C)])
             Ci    = 1. / C
             WCiW = WM * Ci * WM
         #==============================================================#
@@ -987,6 +997,14 @@ def callM(mvals_):
         else:
             X2_M  = weighted_variance(SPiXi,WCiW,Z,X0_M,X0_M,NCP1,subtract_mean = not self.absolute)
             X2_Q  = weighted_variance(SRiXi,WCiW,Y,X0_Q,X0_Q,NCP1,subtract_mean = not self.absolute)
+            # Print out all energy / force contributions, useful for debugging.
+            # for i in range(XiAll.shape[0]):
+            #     efctr = weighted_variance(XiAll[i],WCiW,Z,X0_M,X0_M,NCP1,subtract_mean = not self.absolute)
+            #     WCiW1 = WCiW.copy()
+            #     for j in range(1, len(WCiW1)):
+            #         WCiW1[j] = 0.0
+            #     ectr = weighted_variance(XiAll[i],WCiW1,Z,X0_M,X0_M,NCP1,subtract_mean = not self.absolute)
+            #     print i, "ectr = %.3f efctr = %.3f" % (ectr, efctr)
             for p in self.pgrad:
                 if not AGrad: continue
                 X2_M_p[p] = weighted_variance(SPiXi_p[p],WCiW,Z,2*X0_M,M0_M_p[p],NCP1,subtract_mean = not self.absolute)
diff --git a/src/molecule.py b/src/molecule.py
index 2e17bcc05..f423ec254 100644
--- a/src/molecule.py
+++ b/src/molecule.py
@@ -1036,6 +1036,8 @@ def write(self,fnm=None,ftype=None,append=False,select=None,**kwargs):
         elif ftype == None:
             ftype = os.path.splitext(fnm)[1][1:]
         ## Fill in comments.
+        if 'comms' not in self.Data:
+            self.comms = ['Generated by ForceBalance from %s: Frame %i of %i' % (fnm, i+1, self.ns) for i in range(self.ns)]
         if 'xyzs' in self.Data and len(self.comms) < len(self.xyzs):
             for i in range(len(self.comms), len(self.xyzs)):
                 self.comms.append("Frame %i: generated by ForceBalance" % i)
diff --git a/src/parser.py b/src/parser.py
index b21a39214..27e731bc0 100644
--- a/src/parser.py
+++ b/src/parser.py
@@ -194,7 +194,7 @@
                  "absolute"         : (0, -150, 'When matching energies in AbInitio, do not subtract the mean energy gap.', 'Energy matching (advanced usage)', 'abinitio'),
                  "cauchy"           : (0, 0, 'Normalize interaction energies each using 1/(denom**2 + reference**2) which resembles a Cauchy distribution', 'Interaction energy targets', 'interaction'),
                  "attenuate"        : (0, 0, 'Normalize interaction energies using 1/(denom**2 + reference**2) only for repulsive interactions greater than denom.', 'Interaction energy targets', 'interaction'),
-                 "normalize"        : (0, -150, 'Divide interaction energy objective by the number of snapshots', 'Interaction energy targets', 'interaction'),
+                 "normalize"        : (0, -150, 'Divide objective function by the number of snapshots / vibrations', 'Interaction energy / vibrational mode targets', 'interaction, vibration'),
                  "manual"           : (0, -150, 'Give the user a chance to fill in condensed phase stuff on the zeroth step', 'Condensed phase property targets (advanced usage)', 'liquid'),
                  "hvap_subaverage"  : (0, -150, 'Don\'t target the average enthalpy of vaporization and allow it to freely float (experimental)', 'Condensed phase property targets (advanced usage)', 'liquid'),
                  "force_cuda"       : (0, -150, 'Force the external npt.py script to crash if CUDA Platform not available', 'Condensed phase property targets (advanced usage)', 'liquid_openmm'),
diff --git a/src/vibration.py b/src/vibration.py
index 3b8127d40..0a1e98928 100644
--- a/src/vibration.py
+++ b/src/vibration.py
@@ -45,6 +45,7 @@ def __init__(self,options,tgt_opts,forcefield):
         #======================================#
         self.set_option(tgt_opts,'wavenumber_tol','denom')
         self.set_option(tgt_opts,'reassign_modes','reassign')
+        self.set_option(tgt_opts,'normalize')
         
         #======================================#
         #     Variables which are set here     #
@@ -105,7 +106,7 @@ def read_reference_data(self):
 
     def indicate(self):
         """ Print qualitative indicator. """
-        # if self.reassign == 'overlap' : count_assignment(self.c2r)
+        if self.reassign == 'overlap' : count_assignment(self.c2r)
         banner = "Frequencies (wavenumbers)"
         headings = ["Mode #", "Reference", "Calculated", "Difference", "Ref(dot)Calc"]
         data = OrderedDict([(i, [self.ref_eigvals[i], self.calc_eigvals[i], self.calc_eigvals[i] - self.ref_eigvals[i], "%.4f" % self.overlaps[i]]) for i in range(len(self.ref_eigvals))])
@@ -154,14 +155,21 @@ def get_eigvals(mvals_):
             self.FF.make(mvals_)
             eigvals, eigvecs = self.vibration_driver()
             eigvecs_nrm, eigvecs_nrm_mw = self.process_vectors(eigvecs)
+            # The overlap metric may take into account some frequency differences
+            dev = np.array([[(np.abs(i-j)/1000)/(1.0+np.abs(i-j)/1000) for j in self.ref_eigvals] for i in eigvals])
+            for i in range(dev.shape[0]):
+                dev[i, :] /= max(dev[i, :])
+
             if self.reassign in ['permute', 'overlap']:
-                a = np.array([[int(1e6*(1.0-np.dot(v1.flatten(),v2.flatten())**2)) for v2 in self.ref_eigvecs_nrm] for v1 in eigvecs_nrm_mw])
                 # In the matrix that we constructed, these are the column numbers (reference mode numbers) 
                 # that are mapped to the row numbers (calculated mode numbers)
                 if self.reassign == 'permute':
+                    a = np.array([[int(1e6*(1.0-np.dot(v1.flatten(),v2.flatten())**2)) for v2 in self.ref_eigvecs_nrm] for v1 in eigvecs_nrm_mw])
                     c2r = Assign(a)
                     eigvals = eigvals[c2r]
                 elif self.reassign == 'overlap':
+                    a = np.array([[(1.0-np.dot(v1.flatten(),v2.flatten())**2) for v2 in self.ref_eigvecs_nrm] for v1 in eigvecs_nrm_mw])
+                    a += dev
                     c2r = np.argmin(a, axis=0)
                     eigvals_p = []
                     for j in c2r:
@@ -185,11 +193,11 @@ def get_eigvals(mvals_):
         if AGrad or AHess:
             for p in self.pgrad:
                 dV[p,:], _ = f12d3p(fdwrap(get_eigvals, mvals, p), h = self.h, f0 = calc_eigvals)
-        Answer['X'] = np.dot(D,D) / self.denom**2
+        Answer['X'] = np.dot(D,D) / self.denom**2 / (len(D) if self.normalize else 1)
         for p in self.pgrad:
-            Answer['G'][p] = 2*np.dot(D, dV[p,:]) / self.denom**2
+            Answer['G'][p] = 2*np.dot(D, dV[p,:]) / self.denom**2 / (len(D) if self.normalize else 1)
             for q in self.pgrad:
-                Answer['H'][p,q] = 2*np.dot(dV[p,:], dV[q,:]) / self.denom**2
+                Answer['H'][p,q] = 2*np.dot(dV[p,:], dV[q,:]) / self.denom**2 / (len(D) if self.normalize else 1)
         if not in_fd():
             self.calc_eigvals = calc_eigvals
             self.objective = Answer['X']

From 7726de98fec7e5ab8bbe7ee69730bfd445bf7542 Mon Sep 17 00:00:00 2001
From: Lee-Ping Wang <leeping@dn0a22f46e.sunet>
Date: Mon, 21 Apr 2014 17:16:03 -0700
Subject: [PATCH 15/25] Work in progress

---
 src/data/md_one.py | 268 +++++++++++++++++++++++++++++++++++++++++++++
 src/nifty.py       |  26 ++---
 src/thermo.py      |  93 ++++++++++------
 3 files changed, 340 insertions(+), 47 deletions(-)
 create mode 100644 src/data/md_one.py

diff --git a/src/data/md_one.py b/src/data/md_one.py
new file mode 100644
index 000000000..b52a87d9b
--- /dev/null
+++ b/src/data/md_one.py
@@ -0,0 +1,268 @@
+#!/usr/bin/env python
+
+"""
+md_one
+========
+
+This script is a part of ForceBalance and runs a single simulation
+that may be combined with others to calculate general thermodynamic
+properties.
+
+This script is meant to be launched automatically by ForceBalance.
+
+"""
+
+#==================#
+#| Global Imports |#
+#==================#
+
+import os
+import argparse
+import numpy as np
+import importlib as il
+
+from forcebalance.nifty import lp_dump, lp_load, wopen
+from forcebalance.nifty import printcool, printcool_dictionary
+from forcebalance.molecule import Molecule
+
+from collections import OrderedDict
+
+from forcebalance.output import getLogger
+logger = getLogger(__name__)
+
+#========================================================#
+#| Global, user-tunable variables (simulation settings) |#
+#========================================================#
+
+# Note: Only the simulation settings that vary across different
+# simulations in a target may be specified on the command line.
+
+parser = argparse.ArgumentParser()
+parser.add_argument('simulation', type=str,
+                    help='The simulation name (important; used in setting up)')
+parser.add_argument('-T', type=float, default=None,
+                    help='Simulation temperature, leave blank for constant energy')
+parser.add_argument('-P', type=float, default=None,
+                    help='Simulation pressure, leave blank for constant volume')
+parser.add_argument('-g', action='store_true',
+                    help='Calculate gradients for output time series')
+
+# These settings may be specified for debugging purposes (i.e. they
+# will override what we read from forcebalance.p)
+parser.add_argument('--nequil', type=int, default=0,
+                    help='Number of steps for equilibration run (leave blank to use default from forcebalance.p)')
+parser.add_argument('--nsteps', type=int, default=0,
+                    help='Number of steps for production run (leave blank to use default from forcebalance.p)')
+parser.add_argument('--timestep', type=float, default=0.0,
+                    help='Time step in femtoseconds (leave blank to use default from forcebalance.p)')
+parser.add_argument('--interval', type=float, default=0.0,
+                    help='Sampling interval in picoseonds (leave blank to use default from forcebalance.p)')
+parser.add_argument('--outputs', type=list, nargs='+', 
+                    help='Specify the time series which are written to disk')
+
+args = parser.parse_args()
+
+def main():
+    
+    """Usage:
+    
+    (prefix.sh) md_one.py <name of simulation>
+                               -T <temperature in kelvin>
+                               -P <pressure in atm>
+                               -g (if gradients of output timeseries are desired)
+                               [Debugging Options Below]
+                               --nequil <number of equilibration MD steps>
+                               --nsteps <number of production MD steps>
+                               --outputs <list of output time sties>
+        
+    This program is meant to be called automatically by ForceBalance
+    because most options are loaded from the 'forcebalance.p' input
+    file.
+    
+    """
+
+    printcool("ForceBalance simulation using engine: %s" % engname.upper(),
+              color=4, bold=True)
+    #----
+    # Load the ForceBalance pickle file which contains:
+    #----
+    # - Force field object
+    # - Optimization parameters
+    # - Options from the Target object that launched this simulation
+    FF, mvals, TgtOptions = lp_load(open('forcebalance.p'))
+    FF.ffdir = '.'
+    # Write the force field file.
+    FF.make(mvals)
+    # Switch for calculating gradients of output time series.
+    AGrad = args.g
+
+    #----
+    # Load the options that are set in the ForceBalance input file.
+    #----
+    # Finite difference step size
+    h = TgtOptions['h']
+    # Active parameters for gradient (if we filtered out the
+    # parameters that are known to have no effect)
+    pgrad = TgtOptions['pgrad']
+    # MD options; time step (fs), production steps, equilibration steps, interval for saving data (ps)
+    timestep = args.timestep if args.timestep > 0 else SimOptions['timestep']
+    nsteps = args.nsteps if args.nsteps > 0 else SimOptions['nsteps']
+    nequil = args.nequil if args.nequil > 0 else SimOptions['nequil']
+    intvl = args.intvl if args.intvl > 0 else SimOptions['interval']
+    fnm = SimOptions['coords']
+    if not fnm.startswith(args.simulation):
+        logger.error("Problem with SimOptions['coords'] (%s):\n" % fnm)
+        logger.error("Coordinate file must be consistent with simulation type (%s)\n" % args.simulation)
+
+    # Number of threads, multiple timestep integrator, anisotropic box etc.
+    threads = SimOptions.get('md_threads', 1)
+    mts = SimOptions.get('mts_integrator', 0)
+    rpmd_beads = SimOptions.get('rpmd_beads', 0)
+    force_cuda = SimOptions.get('force_cuda', 0)
+    nbarostat = SimOptions.get('n_mcbarostat', 25)
+    anisotropic = SimOptions.get('anisotropic_box', 0)
+    minimize = SimOptions.get('minimize_energy', 1)
+
+    
+
+    #----
+    # Setting up MD simulations
+    #----
+    EngOpts = OrderedDict()
+    EngOpts["liquid"] = OrderedDict([("coords", liquid_fnm), ("mol", ML), ("pbc", True)])
+    EngOpts["gas"] = OrderedDict([("coords", gas_fnm), ("mol", MG), ("pbc", False)])
+    GenOpts = OrderedDict([('FF', FF)])
+    if engname == "openmm":
+        # OpenMM-specific options
+        EngOpts["liquid"]["platname"] = 'CUDA'
+        EngOpts["gas"]["platname"] = 'Reference'
+        if force_cuda:
+            try: Platform.getPlatformByName('CUDA')
+            except: raise RuntimeError('Forcing failure because CUDA platform unavailable')
+        if threads > 1: logger.warn("Setting the number of threads will have no effect on OpenMM engine.\n")
+    elif engname == "gromacs":
+        # Gromacs-specific options
+        GenOpts["gmxpath"] = TgtOptions["gmxpath"]
+        GenOpts["gmxsuffix"] = TgtOptions["gmxsuffix"]
+        EngOpts["liquid"]["gmx_top"] = os.path.splitext(liquid_fnm)[0] + ".top"
+        EngOpts["liquid"]["gmx_mdp"] = os.path.splitext(liquid_fnm)[0] + ".mdp"
+        EngOpts["gas"]["gmx_top"] = os.path.splitext(gas_fnm)[0] + ".top"
+        EngOpts["gas"]["gmx_mdp"] = os.path.splitext(gas_fnm)[0] + ".mdp"
+        if force_cuda: logger.warn("force_cuda option has no effect on Gromacs engine.")
+        if rpmd_beads > 0: raise RuntimeError("Gromacs cannot handle RPMD.")
+        if mts: logger.warn("Gromacs not configured for multiple timestep integrator.")
+        if anisotropic: logger.warn("Gromacs not configured for anisotropic box scaling.")
+    elif engname == "tinker":
+        # Tinker-specific options
+        GenOpts["tinkerpath"] = TgtOptions["tinkerpath"]
+        EngOpts["liquid"]["tinker_key"] = os.path.splitext(liquid_fnm)[0] + ".key"
+        EngOpts["gas"]["tinker_key"] = os.path.splitext(gas_fnm)[0] + ".key"
+        if force_cuda: logger.warn("force_cuda option has no effect on Tinker engine.")
+        if rpmd_beads > 0: raise RuntimeError("TINKER cannot handle RPMD.")
+        if mts: logger.warn("Tinker not configured for multiple timestep integrator.")
+    EngOpts["liquid"].update(GenOpts)
+    EngOpts["gas"].update(GenOpts)
+    for i in EngOpts:
+        printcool_dictionary(EngOpts[i], "Engine options for %s" % i)
+
+    # Set up MD options
+    MDOpts = OrderedDict()
+    MDOpts["liquid"] = OrderedDict([("nsteps", liquid_nsteps), ("timestep", liquid_timestep),
+                                    ("temperature", temperature), ("pressure", pressure),
+                                    ("nequil", liquid_nequil), ("minimize", minimize),
+                                    ("nsave", int(1000 * liquid_intvl / liquid_timestep)),
+                                    ("verbose", True), ('save_traj', TgtOptions['save_traj']), 
+                                    ("threads", threads), ("anisotropic", anisotropic), ("nbarostat", nbarostat),
+                                    ("mts", mts), ("rpmd_beads", rpmd_beads), ("faststep", faststep)])
+    MDOpts["gas"] = OrderedDict([("nsteps", gas_nsteps), ("timestep", gas_timestep),
+                                 ("temperature", temperature), ("nsave", int(1000 * gas_intvl / gas_timestep)),
+                                 ("nequil", gas_nequil), ("minimize", minimize), ("threads", 1), ("mts", mts),
+                                 ("rpmd_beads", rpmd_beads), ("faststep", faststep)])
+
+
+    engines = []
+    ## Setup and carry out simulations in chain
+    for i in range(args.length):
+        # Simulation files
+        if engname == "gromacs":
+            ndx_flag = False
+            coords   = args.name + str(i+1) + ".gro"
+            top_file = args.name + str(i+1) + ".top"
+            mdp_file = args.name + str(i+1) + ".mdp"
+            ndx_file = args.name + str(i+1) + ".ndx"
+            if os.path.exists(ndx_file):
+                ndx_flag = True
+                
+        mol = Molecule(coords)
+        #----
+        # Set coordinates and molecule for engine
+        #----
+        EngOpts = OrderedDict([("FF", FF),
+                               ("pbc", True),
+                               ("coords", coords),
+                               ("mol", mol)])
+    
+        if engname == "gromacs":
+            # Gromacs-specific options
+            EngOpts["gmx_top"] = top_file
+            EngOpts["gmx_mdp"] = mdp_file
+            if ndx_flag:
+                EngOpts["gmx_ndx"] = ndx_file
+                
+        printcool_dictionary(EngOpts)
+                                
+        # Create engine objects and store them for subsequent analysis.
+        s = Engine(name=args.name+str(i+1), **EngOpts)
+                
+        #=====================#
+        # Run the simulation. #
+        #=====================#
+        MDOpts = OrderedDict([("nsteps", args.nsteps),
+                              ("nequil", args.nequil)])
+
+        printcool("Molecular dynamics simulation", color=4, bold=True)
+        s.md(verbose=True, **MDOpts)
+                                    
+        engines.append(s)
+    
+    #======================================================================#
+    # Extract the quantities of interest from the MD simulations and dump  #
+    # the results to file.                                                 #
+    # =====================================================================#    
+    results = OrderedDict()        
+    for q in args.quantities:
+        logger.info("Extracting %s...\n" % q)
+
+        # Initialize quantity
+        objstr = "Quantity_" + q.capitalize()
+        dm     = il.import_module('..quantity',
+                                  package='forcebalance.quantity')
+            
+        Quantity = getattr(dm, objstr)(engname, args.temperature, args.pressure)
+            
+        Q, Qerr, Qgrad = Quantity.extract(engines, FF, mvals, h, pgrad, AGrad)
+                    
+        results.setdefault("values", []).append(Q)
+        results.setdefault("errors", []).append(Qerr)
+        results.setdefault("grads",  []).append(Qgrad)
+            
+        logger.info("Finished!\n")
+            
+        # Print out results for the quantity and its derivative.
+        Sep = printcool(("%s: % .4f +- % .4f \nAnalytic Derivative:"
+                              % (q.capitalize(), Q, Qerr)))
+        FF.print_map(vals=Qgrad)
+            
+    # Dump results to file
+    logger.info("Writing final force field.\n")
+    pvals = FF.make(mvals)
+    
+    logger.info("Writing all simulation data to disk.\n")
+    with wopen('md_result.p') as f:
+        lp_dump((np.asarray(results["values"]),
+                 np.asarray(results["errors"]),
+                 np.asarray(results["grads"])), f)
+    
+if __name__ == "__main__":
+    main()
+
diff --git a/src/nifty.py b/src/nifty.py
index e0ff154a5..d51999d1e 100644
--- a/src/nifty.py
+++ b/src/nifty.py
@@ -237,11 +237,17 @@ def magic_string(str):
 #===============================#
 #| Math: Variable manipulation |#
 #===============================#
-def isnan(var):
-    """ Attempt to see if the given variable is np.nan. """
-    if isinstance(var, float):
+def isnpnan(var):
+    """ 
+
+    Determine whether a variable is np.nan.  I wrote this function
+    because np.isnan would crash if we use it on a dtype that is not
+    np.float
+    
+    """
+    if any([isinstance(var, x) for x in [float, np.float, np.float32, np.float64, np.double]]):
         return np.isnan(var)
-    return False
+    else: return False
 
 def isint(word):
     """ONLY matches integers! If you have a decimal point? None shall pass!
@@ -285,18 +291,6 @@ def floatornan(word):
         logger.info("Setting %s to % .1e\n" % big)
         return big
 
-def isnpnan(var):
-    """ 
-
-    Determine whether a variable is np.nan.  I wrote this function
-    because np.isnan would crash if we use it on a dtype that is not
-    np.float
-    
-    """
-    if type(var) in [np.float, np.float32, np.float64, np.double]:
-        return np.isnan(var)
-    return False
-
 def col(vec):
     """
     Given any list, array, or matrix, return a 1-column matrix.
diff --git a/src/thermo.py b/src/thermo.py
index 7e80fdfab..71bb681ea 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -532,7 +532,7 @@ def reffld_error(reason=''):
         def intcol(col):
             if col in self.Data.columns:
                 for idx in self.Data.index:
-                    if not isnan(self.Data[col][idx]):
+                    if not isnpnan(self.Data[col][idx]):
                         self.Data[col][idx] = int(self.Data[col][idx])
 
         def floatcol(col):
@@ -571,36 +571,61 @@ def floatcol(col):
 
         return
 
-    def find_ic(self, index, stype, icn):
+    def find_file(self, index, stype, sufs, icn):
         """ 
-        Search for a suitable initial condition file.
+        Search for a suitable file that matches the simulation index,
+        type, suffix and IC number.  This can be used to search for
+        initial coordinates, but also auxiliary files for the
+        simulation (e.g. .top and .mdp files for a Gromacs simulation,
+        or .key files for a Tinker simulation.)
+
+        Generally, it is preferred to provide files where the base
+        name matches the simulation type.  However, since it is also
+        okay to put all files for a simulation type into a
+        subdirectory, generic file names like 'topol' and 'conf' may
+        be used.
     
-        Initial condition files will be searched for in the following priority:
-        targets/target_name/index/stype/ICs/stype_#.xyz
-        targets/target_name/index/stype/ICs/stype#.xyz
-        targets/target_name/index/stype/ICs/#.xyz
-        targets/target_name/index/stype/ICs/stype.xyz
-        targets/target_name/index/stype/ICs/coords.xyz
-        targets/target_name/index/stype/stype.xyz
-        targets/target_name/index/stype/coords.xyz
-        targets/target_name/index/stype.xyz
-        targets/target_name/stype.xyz
+        Initial condition files will be searched for in the following priority (suf stands for suffix)
+        targets/target_name/index/stype/ICs/stype_#.suf
+        targets/target_name/index/stype/ICs/stype#.suf
+        targets/target_name/index/stype/ICs/#.suf
+        targets/target_name/index/stype/ICs/stype.suf
+        targets/target_name/index/stype/ICs/coords.suf
+        targets/target_name/index/stype/ICs/conf.suf
+        targets/target_name/index/stype/ICs/topol.suf
+        targets/target_name/index/stype/ICs/grompp.suf
+        targets/target_name/index/stype/ICs/input.suf
+        targets/target_name/index/stype/ICs/tinker.suf
+        targets/target_name/index/stype/stype.suf
+        targets/target_name/index/stype/coords.suf
+        targets/target_name/index/stype.suf
+        targets/target_name/stype.suf
+
+        @param[in] index Name of the index directory to look in
+        @param[in] stype Name of the simulation type to look for
+        @param[in] sufs List of suffixes to look for in order of priority
+        @param[in] icn Initial coordinate number (will look for sequentially numbered file, or single file with multiple structures)
         """
         found = ''
-        basefnms = [(os.path.join(index, stype, 'ICs', stype+'_'+("%i" % icn)), True),
-                    (os.path.join(index, stype, 'ICs', stype+("%i" % icn)), True),
-                    (os.path.join(index, stype, 'ICs', ("%i" % icn)), True),
-                    (os.path.join(index, stype, 'ICs', stype), False),
-                    (os.path.join(index, stype, 'ICs', 'coords'), False),
-                    (os.path.join(index, stype, stype), False),
-                    (os.path.join(index, stype, 'coords'), False),
-                    (os.path.join(index, stype), False),
-                    (os.path.join(stype), False)]
-        paths = []
+        # The 2-tuple here corresponds to:
+        # - Search path for the file
+        # - Whether the file that we're looking for is 'numbered'
+        #   (i.e. a different file for each structure); otherwise the
+        #   single file may contain multiple structures
+        pfxs = [stype, 'coords', 'conf', 'topol', 'grompp', 'input', 'tinker', '']
+        
+        basefnms = list(itertools.chain(*[[(os.path.join(index, stype, 'ICs', pfx+'_'+("%i" % icn)), True),
+                                           (os.path.join(index, stype, 'ICs', pfx+("%i" % icn)), True),
+                                           (os.path.join(index, stype, 'ICs', pfx), False),
+                                           (os.path.join(index, stype, pfx), False),
+                                           (os.path.join(index, pfx), False),
+                                           (os.path.join(pfx), False)] for pfx in pfxs]))
+
+        paths = OrderedDict()
         for fnm, numbered in basefnms:
-            for crdsfx in self.crdsfx:
-                fpath = os.path.join(self.tgtdir, fnm+crdsfx)
-                paths.append(fpath)
+            for suf in sufs:
+                fpath = os.path.join(self.tgtdir, fnm+suf if suf.startswith('.') else fnm+'.'+suf)
+                paths[fpath] = os.path.exists(fpath)
                 if os.path.exists(fpath):
                     if found != '':
                         logger.info('Target %s Index %s Simulation %s : '
@@ -610,12 +635,15 @@ def find_ic(self, index, stype, icn):
                             M = Molecule(fpath)
                             if len(M) <= icn:
                                 logger.error("Target %s Index %s Simulation %s : "
-                                             "initial coordinate file %s doesn't have enough structures\n" % 
+                                             "file %s doesn't have enough structures\n" % 
                                              (self.name, index, stype, fpath))
                                 raise RuntimeError
                         logger.info('Target %s Index %s Simulation %s : '
-                                    'found initial coordinate file %s\n' % (self.name, index, stype, fpath))
+                                    'found file %s\n' % (self.name, index, stype, fpath))
                         found = fpath
+        if found == '':
+            logger.error("Can't find a file for index %s, simulation %s, suffix %s in the search path" % (index, stype, '/'.join(sufs)))
+            raise RuntimeError
         return found, 0 if numbered else icn
     
     def initialize_observables(self):
@@ -685,15 +713,16 @@ def initialize_simulations(self):
 
         for index in self.Indices:
             for stype, tsset in SimTS.items():
-                if 'n_ic' in self.Data.ix[index]:
-                    n_ic = self.Data.ix[index]['n_ic']
+                if 'n_ic' in self.Data2.ix[index]:
+                    n_ic = self.Data2.ix[index]['n_ic']
+                    print n_ic
                     if n_ic < 1:
                         logger.error("n_ic must >= 1")
                         raise RuntimeError
                 else:
                     n_ic = 1
                 for icn in range(n_ic):
-                    icfnm, icframe = self.find_ic(index, stype, icn)
+                    icfnm, icframe = self.find_file(index, stype, self.crdsfx, icn)
                     sname = "%s_%i" % (stype, icn) if n_ic > 1 else stype
                     self.Simulations[index].append(Simulation(sname, index, stype, icfnm, icframe, sorted(list(tsset))))
                     
@@ -747,6 +776,8 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True):
                         cmdlist.append('-T %g' % float(temp))
                     if pres != None:
                         cmdlist.append('-P %g' % float(pres))
+                    if AGrad or AHess:
+                        cmdlist.append('-g')
                     cmdstr = ' '.join(cmdlist)
                     print cmdstr
                     # # cmdstr = '%s python md1.py %s %.3f %.3f' % (self.runpfx, temperature, pressure)

From 42eb6a209430231857779ab97143a6fe7ee1208a Mon Sep 17 00:00:00 2001
From: Lee-Ping <leeping@stanford.edu>
Date: Tue, 22 Apr 2014 09:50:15 -0700
Subject: [PATCH 16/25] Reduce the amount of printout

---
 src/target.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/target.py b/src/target.py
index cb79b6006..66011cec2 100644
--- a/src/target.py
+++ b/src/target.py
@@ -715,7 +715,7 @@ def submit_jobs(self, mvals, AGrad=False, AHess=False):
         
         wq = getWorkQueue()
         
-        logger.info("Sending target '%s' to work queue for remote evaluation\n" % self.name)
+        # logger.info("Sending target '%s' to work queue for remote evaluation\n" % self.name)
         # input:
         #   forcebalance.p: pickled mvals, options, and forcefield
         #   rtarget.py: remote target evaluation script
@@ -726,7 +726,7 @@ def submit_jobs(self, mvals, AGrad=False, AHess=False):
         forcebalance.nifty.queue_up(wq, "python rtarget.py > rtarget.out 2>&1",
             ["forcebalance.p", "rtarget.py", "target.tar.bz2"],
             ['objective.p', 'indicate.log', 'rtarget.out'],
-            tgt=self)
+            tgt=self, verbose=False)
 
     def read(self,mvals,AGrad=False,AHess=False):
         return self.get(mvals, AGrad, AHess)

From f7ab58546f391e3c07c76c4a552174939c41e710 Mon Sep 17 00:00:00 2001
From: Lee-Ping Wang <leeping@dn0a22f3de.sunet>
Date: Tue, 22 Apr 2014 13:32:18 -0700
Subject: [PATCH 17/25] Work in progress

---
 src/data/md_one.py           | 103 ++++++++++++++++++-----------------
 src/parser.py                |   4 +-
 src/thermo.py                |  40 ++++++++++++--
 studies/004_thermo/single.in |   2 +
 4 files changed, 92 insertions(+), 57 deletions(-)

diff --git a/src/data/md_one.py b/src/data/md_one.py
index b52a87d9b..d0d956531 100644
--- a/src/data/md_one.py
+++ b/src/data/md_one.py
@@ -16,7 +16,7 @@
 #| Global Imports |#
 #==================#
 
-import os
+import os, sys, re
 import argparse
 import numpy as np
 import importlib as il
@@ -37,43 +37,51 @@
 # Note: Only the simulation settings that vary across different
 # simulations in a target may be specified on the command line.
 
+# ANYTHING THREE LETTERS OR BELOW IS A SHORT OPTION WITH ONE DASH!
+
 parser = argparse.ArgumentParser()
 parser.add_argument('simulation', type=str,
                     help='The simulation name (important; used in setting up)')
-parser.add_argument('-T', type=float, default=None,
+parser.add_argument('-T', '--temp', '--temperature', dest='temperature', type=float, 
                     help='Simulation temperature, leave blank for constant energy')
-parser.add_argument('-P', type=float, default=None,
+parser.add_argument('-P', '--pres', '--pressure', dest='pressure', type=float, 
                     help='Simulation pressure, leave blank for constant volume')
-parser.add_argument('-g', action='store_true',
+parser.add_argument('-g', '--grad', '--gradient', dest='gradient', action='store_true',
                     help='Calculate gradients for output time series')
 
 # These settings may be specified for debugging purposes (i.e. they
 # will override what we read from forcebalance.p)
-parser.add_argument('--nequil', type=int, default=0,
+parser.add_argument('-eq', '--nequil', dest='nequil', type=int, 
                     help='Number of steps for equilibration run (leave blank to use default from forcebalance.p)')
-parser.add_argument('--nsteps', type=int, default=0,
+parser.add_argument('-md', '--nsteps', dest='nsteps', type=int, 
                     help='Number of steps for production run (leave blank to use default from forcebalance.p)')
-parser.add_argument('--timestep', type=float, default=0.0,
+parser.add_argument('-dt', '--timestep', dest='timestep', type=float, 
                     help='Time step in femtoseconds (leave blank to use default from forcebalance.p)')
-parser.add_argument('--interval', type=float, default=0.0,
+parser.add_argument('-sp', '--sample', dest='sample', type=float, 
+                    help='Sampling interval in picoseonds (leave blank to use default from forcebalance.p)')
+parser.add_argument('-nt', '--threads', dest='threads', type=int, 
                     help='Sampling interval in picoseonds (leave blank to use default from forcebalance.p)')
-parser.add_argument('--outputs', type=list, nargs='+', 
+parser.add_argument('-min', '--minimize', dest='minimize', action='store_true',
+                    help='Whether to minimize the energy before starting the simulation')
+parser.add_argument('-o', '-out', '--output', dest='output', type=str, nargs='+', 
                     help='Specify the time series which are written to disk')
 
-args = parser.parse_args()
+args = vars(parser.parse_args())
+# args = dict([(i, j) for i, j in vars(parser.parse_args()).items() if j != None])
 
 def main():
     
     """Usage:
     
     (prefix.sh) md_one.py <name of simulation>
-                               -T <temperature in kelvin>
-                               -P <pressure in atm>
-                               -g (if gradients of output timeseries are desired)
-                               [Debugging Options Below]
-                               --nequil <number of equilibration MD steps>
-                               --nsteps <number of production MD steps>
-                               --outputs <list of output time sties>
+                               -T, --temperature <temperature in kelvin>
+                               -P, --pressure <pressure in atm>
+                               -g, --grad (if gradients of output timeseries are desired)
+                               -o, --outputs <list of output time series>
+                               -eq, --nequil <number of equilibration MD steps>
+                               -md, --nsteps <number of production MD steps>
+                               -dt, --timestep <number of production MD steps>
+                               -nt, --interval <number of production MD steps>
         
     This program is meant to be called automatically by ForceBalance
     because most options are loaded from the 'forcebalance.p' input
@@ -81,56 +89,50 @@ def main():
     
     """
 
-    printcool("ForceBalance simulation using engine: %s" % engname.upper(),
-              color=4, bold=True)
+    # printcool("ForceBalance simulation using engine: %s" % engname.upper(),
+    #           color=4, bold=True)
+
     #----
     # Load the ForceBalance pickle file which contains:
     #----
     # - Force field object
     # - Optimization parameters
-    # - Options from the Target object that launched this simulation
-    FF, mvals, TgtOptions = lp_load(open('forcebalance.p'))
+    # - Options loaded from file
+    FF, mvals, fopts = lp_load(open('forcebalance.p'))
     FF.ffdir = '.'
     # Write the force field file.
     FF.make(mvals)
-    # Switch for calculating gradients of output time series.
-    AGrad = args.g
+    # # Switch for calculating gradients of output time series.
+    # AGrad = args['gradient']
 
     #----
-    # Load the options that are set in the ForceBalance input file.
+    # Load some options from file
     #----
     # Finite difference step size
-    h = TgtOptions['h']
+    h = fopts['h']
     # Active parameters for gradient (if we filtered out the
     # parameters that are known to have no effect)
-    pgrad = TgtOptions['pgrad']
-    # MD options; time step (fs), production steps, equilibration steps, interval for saving data (ps)
-    timestep = args.timestep if args.timestep > 0 else SimOptions['timestep']
-    nsteps = args.nsteps if args.nsteps > 0 else SimOptions['nsteps']
-    nequil = args.nequil if args.nequil > 0 else SimOptions['nequil']
-    intvl = args.intvl if args.intvl > 0 else SimOptions['interval']
-    fnm = SimOptions['coords']
-    if not fnm.startswith(args.simulation):
-        logger.error("Problem with SimOptions['coords'] (%s):\n" % fnm)
-        logger.error("Coordinate file must be consistent with simulation type (%s)\n" % args.simulation)
+    pgrad = fopts['pgrad']
 
-    # Number of threads, multiple timestep integrator, anisotropic box etc.
-    threads = SimOptions.get('md_threads', 1)
-    mts = SimOptions.get('mts_integrator', 0)
-    rpmd_beads = SimOptions.get('rpmd_beads', 0)
-    force_cuda = SimOptions.get('force_cuda', 0)
-    nbarostat = SimOptions.get('n_mcbarostat', 25)
-    anisotropic = SimOptions.get('anisotropic_box', 0)
-    minimize = SimOptions.get('minimize_energy', 1)
+    printcool_dictionary(args)
 
-    
+    # Number of threads, multiple timestep integrator, anisotropic box etc.
+    threads = fopts.get('md_threads', 1)
+    mts = fopts.get('mts_integrator', 0)
+    rpmd_beads = fopts.get('rpmd_beads', 0)
+    force_cuda = fopts.get('force_cuda', 0)
+    nbarostat = fopts.get('n_mcbarostat', 25)
+    anisotropic = fopts.get('anisotropic_box', 0)
+    minimize = fopts.get('minimize_energy', 1)
 
     #----
     # Setting up MD simulations
     #----
+
     EngOpts = OrderedDict()
+    EngOpts = OrderedDict([("coords", fopts['coords']), ("pbc", False)])
+
     EngOpts["liquid"] = OrderedDict([("coords", liquid_fnm), ("mol", ML), ("pbc", True)])
-    EngOpts["gas"] = OrderedDict([("coords", gas_fnm), ("mol", MG), ("pbc", False)])
     GenOpts = OrderedDict([('FF', FF)])
     if engname == "openmm":
         # OpenMM-specific options
@@ -142,8 +144,8 @@ def main():
         if threads > 1: logger.warn("Setting the number of threads will have no effect on OpenMM engine.\n")
     elif engname == "gromacs":
         # Gromacs-specific options
-        GenOpts["gmxpath"] = TgtOptions["gmxpath"]
-        GenOpts["gmxsuffix"] = TgtOptions["gmxsuffix"]
+        GenOpts["gmxpath"] = fopts["gmxpath"]
+        GenOpts["gmxsuffix"] = fopts["gmxsuffix"]
         EngOpts["liquid"]["gmx_top"] = os.path.splitext(liquid_fnm)[0] + ".top"
         EngOpts["liquid"]["gmx_mdp"] = os.path.splitext(liquid_fnm)[0] + ".mdp"
         EngOpts["gas"]["gmx_top"] = os.path.splitext(gas_fnm)[0] + ".top"
@@ -154,7 +156,7 @@ def main():
         if anisotropic: logger.warn("Gromacs not configured for anisotropic box scaling.")
     elif engname == "tinker":
         # Tinker-specific options
-        GenOpts["tinkerpath"] = TgtOptions["tinkerpath"]
+        GenOpts["tinkerpath"] = fopts["tinkerpath"]
         EngOpts["liquid"]["tinker_key"] = os.path.splitext(liquid_fnm)[0] + ".key"
         EngOpts["gas"]["tinker_key"] = os.path.splitext(gas_fnm)[0] + ".key"
         if force_cuda: logger.warn("force_cuda option has no effect on Tinker engine.")
@@ -166,12 +168,13 @@ def main():
         printcool_dictionary(EngOpts[i], "Engine options for %s" % i)
 
     # Set up MD options
-    MDOpts = OrderedDict()
+    # These are used in the function call to molecular_dynamics()
+
     MDOpts["liquid"] = OrderedDict([("nsteps", liquid_nsteps), ("timestep", liquid_timestep),
                                     ("temperature", temperature), ("pressure", pressure),
                                     ("nequil", liquid_nequil), ("minimize", minimize),
                                     ("nsave", int(1000 * liquid_intvl / liquid_timestep)),
-                                    ("verbose", True), ('save_traj', TgtOptions['save_traj']), 
+                                    ("verbose", True), ('save_traj', fopts['save_traj']), 
                                     ("threads", threads), ("anisotropic", anisotropic), ("nbarostat", nbarostat),
                                     ("mts", mts), ("rpmd_beads", rpmd_beads), ("faststep", faststep)])
     MDOpts["gas"] = OrderedDict([("nsteps", gas_nsteps), ("timestep", gas_timestep),
diff --git a/src/parser.py b/src/parser.py
index f85ef4fb8..b424ae7dc 100644
--- a/src/parser.py
+++ b/src/parser.py
@@ -123,7 +123,7 @@
                  "adaptive_damping"       : (0.5,   10, 'Damping factor that ties down the trust radius to trust0; decrease for a more variable step size.', 'Main Optimizer'),
                  "error_tolerance"        : (0.0,   10, 'Error tolerance; the optimizer will only reject steps that increase the objective function by more than this number.', 'Main Optimizer'),
                  "search_tolerance"       : (1e-4, -10, 'Search tolerance; used only when trust radius is negative, dictates convergence threshold of nonlinear search.', 'Main Optimizer with negative mintrust; advanced usage'),
-                 "amoeba_eps"             : (None, -10, 'The AMOEBA mutual polarization criterion.', 'Targets in OpenMM / TINKER that use the AMOEBA force field', ['OPENMM','TINKER'])
+                 "amoeba_eps"             : (None, -10, 'The AMOEBA mutual polarization criterion.', 'Targets in OpenMM / TINKER that use the AMOEBA force field', ['OPENMM','TINKER']),
                  },
     'sections': {"read_mvals" : (None, 100, 'Paste mathematical parameters into the input file for them to be read in directly', 'Restarting an optimization'),
                  "read_pvals" : (None, 100, 'Paste physical parameters into the input file for them to be read in directly', 'Restarting an optimization (recommend use_mvals instead)'),
@@ -239,6 +239,8 @@
                  "self_pol_mu0"  : (0.0, -150, 'Gas-phase dipole parameter for self-polarization correction (in debye).', 'Condensed phase property targets', 'liquid'),
                  "self_pol_alpha"  : (0.0, -150, 'Polarizability parameter for self-polarization correction (in debye).', 'Condensed phase property targets', 'liquid'),
                  "epsgrad"         : (0.0, -150, 'Gradient below this threshold will be set to zero.', 'All targets'),
+                 "timestep"               : (1.0, 0, 'Time step for molecular dynamics (in femtoseconds).', 'Thermodynamic property targets', 'thermo'),
+                 "interval"               : (1.0, 0, 'Sampling interval for molecular dynamics (in picoseconds).', 'Thermodynamic property targets', 'thermo'),
                  },
     'sections': {}
     }
diff --git a/src/thermo.py b/src/thermo.py
index 71bb681ea..8736bf1e6 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -369,9 +369,13 @@ def __init__(self, options, tgt_opts, forcefield):
         # Length of simulation chain
         self.set_option(tgt_opts, "simulations", "user_simulation_names", forceprint=True)
         # Number of time steps in the equilibration run
-        self.set_option(tgt_opts, "eq_steps", forceprint=True)
+        self.set_option(tgt_opts, "eq_steps", "nequil", forceprint=True)
         # Number of time steps in the production run
-        self.set_option(tgt_opts, "md_steps", forceprint=True)
+        self.set_option(tgt_opts, "md_steps", "nsteps", forceprint=True)
+        # Time step (in femtoseconds)
+        self.set_option(tgt_opts, "timestep", forceprint=True)
+        # Sampling interval (in picoseconds)
+        self.set_option(tgt_opts, "interval", "sample", forceprint=True)
 
         ## Variables
         # Prefix names for simulation data
@@ -384,6 +388,8 @@ def __init__(self, options, tgt_opts, forcefield):
         self.weights   = {}
         # The list of simulations that we'll be running.
         self.SimNames = [i.lower() for i in self.user_simulation_names]
+        # Store the dictionary of allowed suffixes
+        self.OptionDict['crdsfx'] = self.crdsfx
 
         ## Read source data and initialize points; creates self.Data, self.Indices and self.Columns objects.
         self.read_source(os.path.join(self.root, self.tgtdir, self.source))
@@ -762,7 +768,6 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True):
                 # Submit or run the simulation if the result file does not exist.
                 if not (os.path.exists('result.p') or os.path.exists('result.p.bz2')):
                     # Write to disk: Force field object, current parameter values, target options
-                    with wopen('forcebalance.p') as f: lp_dump((self.FF,mvals,self.OptionDict),f)
                     M = Molecule(os.path.join(self.root, Sim.initial))[Sim.iframe]
                     M.write("%s%s" % (Sim.type, self.crdsfx[0]))
                     # # Get relevant files from the target folder, I suppose.
@@ -771,20 +776,43 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True):
                     # self.last_traj += [os.path.join(os.getcwd(), i) for i in self.extra_output]
                     # self.liquid_mol[simnum%len(self.liquid_mol)].write(self.liquid_coords, ftype='tinker' if self.engname == 'tinker' else None)
                     # Command for running the simulation.
+                    ## Copy run scripts from ForceBalance installation directory
+
+                    # We can build the entire MD options dictionary here!!
+                    # Update dictionary with simulation options.
+                    OptionDict = copy.deepcopy(self.OptionDict)
+                    OptionDict['gradient'] = AGrad
+                    OptionDict['coords'] = "%s%s" % (Sim.type, self.crdsfx[0])
+                    OptionDict['simtype'] = Sim.type
+                    # # In the future we should have these settings 
+                    # OptionDict['nequil'] = self.nequil
+                    # OptionDict['nsteps'] = self.nsteps
+                    # OptionDict['timestep'] = self.timestep
+                    # OptionDict['sample'] = self.sample
+                    # OptionDict['minimize'] = self.minimize
+                    printcool_dictionary(OptionDict)
+
+                    with wopen('forcebalance.p') as f: lp_dump((self.FF,mvals,OptionDict),f)
+                    for f in self.scripts:
+                        LinkFile(os.path.join(os.path.split(__file__)[0], "data", f),
+                                 os.path.join(os.getcwd(), f))
                     cmdlist = ['%s python md_one.py %s' % (self.mdpfx, Sim.type)]
+                    #cmdlist.append('-eq %i -md %i -dt %g -sp %g' % (self.nequil, self.nsteps, self.timestep, self.sample))
                     if temp != None:
                         cmdlist.append('-T %g' % float(temp))
                     if pres != None:
                         cmdlist.append('-P %g' % float(pres))
-                    if AGrad or AHess:
-                        cmdlist.append('-g')
+                    # if AGrad or AHess:
+                    #     cmdlist.append('-g')
+                    # cmdlist.append('-o')
+                    # cmdlist += Sim.timeseries.keys()
                     cmdstr = ' '.join(cmdlist)
                     print cmdstr
                     # # cmdstr = '%s python md1.py %s %.3f %.3f' % (self.runpfx, temperature, pressure)
                     # if wq == None:
                     #     logger.info("Running condensed phase simulation locally.\n")
                     #     logger.info("You may tail -f %s/npt.out in another terminal window\n" % os.getcwd())
-                    #     _exec(cmdstr, copy_stderr=True, outfnm='npt.out')
+                    _exec(cmdstr, copy_stderr=True, outfnm='md_one.out')
                     # else:
                     #     queue_up(wq, command = cmdstr+' &> npt.out',
                     #              input_files = self.nptfiles + self.scripts + ['forcebalance.p'],
diff --git a/studies/004_thermo/single.in b/studies/004_thermo/single.in
index 9fb288210..1ec7dbde5 100644
--- a/studies/004_thermo/single.in
+++ b/studies/004_thermo/single.in
@@ -71,4 +71,6 @@ quantities density h_vap
 simulations liquid gas
 md_steps 100000
 eq_steps 50000
+interval 0.1
+timestep 2.0
 $end

From 88b1f15ca3f60b0c1e40fd4d84eefb5fb3db6faf Mon Sep 17 00:00:00 2001
From: leeping <leeping@stanford.edu>
Date: Tue, 22 Apr 2014 17:28:48 -0700
Subject: [PATCH 18/25] Added simulation.py which contains simulation class
 (container for simulation settings)

---
 src/data/md_one.py | 105 ++++++++++++++++------
 src/gmxio.py       |   2 +
 src/simulation.py  |  76 ++++++++++++++++
 src/thermo.py      | 213 +++++++++++++++++++++------------------------
 4 files changed, 255 insertions(+), 141 deletions(-)
 create mode 100644 src/simulation.py

diff --git a/src/data/md_one.py b/src/data/md_one.py
index d0d956531..9da428e84 100644
--- a/src/data/md_one.py
+++ b/src/data/md_one.py
@@ -66,8 +66,7 @@
 parser.add_argument('-o', '-out', '--output', dest='output', type=str, nargs='+', 
                     help='Specify the time series which are written to disk')
 
-args = vars(parser.parse_args())
-# args = dict([(i, j) for i, j in vars(parser.parse_args()).items() if j != None])
+Copts = vars(parser.parse_args())
 
 def main():
     
@@ -98,39 +97,93 @@ def main():
     # - Force field object
     # - Optimization parameters
     # - Options loaded from file
-    FF, mvals, fopts = lp_load(open('forcebalance.p'))
+    FF, mvals, Fopts = lp_load(open('forcebalance.p'))
     FF.ffdir = '.'
     # Write the force field file.
     FF.make(mvals)
-    # # Switch for calculating gradients of output time series.
-    # AGrad = args['gradient']
 
+    printcool_dictionary(Copts, title="Options from command line")
+    printcool_dictionary(Fopts, title="Options from file")
+
+    # Read the command line options (they can override the options from file.)
+    # Calculate energy / dipole derivatives.
+    AGrad = Copts['gradient'] or Fopts['gradient']
+    # Whether to minimize the energy.
+    minimize = Copts['minimize'] or Fopts['minimize']
+    # Engine name.
+    engname = Fopts['engname']
+    # 
+    threads = Copts.get('threads', Fopts.get('threads', 1))
+
+    # # Get the temperature.
+    # temperature = Copts.get('temperature', Fopts.get('temperature', None))
+    # # Get the pressure.
+    # pressure = Copts.get('pressure', Fopts.get('pressure', None))
+    # # 
+    # nequil = Copts.get('nequil', Fopts.get('nequil'))
+    
     #----
-    # Load some options from file
+    # load some options from file
     #----
     # Finite difference step size
-    h = fopts['h']
-    # Active parameters for gradient (if we filtered out the
-    # parameters that are known to have no effect)
-    pgrad = fopts['pgrad']
+    h = Fopts['h']
+    # Active parameters for taking the gradient
+    pgrad = Fopts['pgrad']
+    # Name of the initial coordinate file
+    coords = Fopts['coords']
+    # Base name of the initial coordinate file
+    cbase = os.path.splitext(coords)[0]
+    # Actually start to do stuff.
+    # Molecule object corresponding to 
+    M = Molecule(coords)
+
+    #----
+    # Engine options
+    #----
+    EngOpts = OrderedDict([("coords", coords), ("pbc", Fopts['pbc'])])
+    if engname == "openmm":
+        if pbc:
+            EngOpts["platname"] = 'CUDA'
+        else:
+            EngOpts["platname"] = 'Reference'
+        # Force crash if asking for the CUDA platform and force_cuda option is on
+        # (because we don't want to inadvertently run using Reference platform)
+        if EngOpts["platname"] == 'CUDA' and Fopts['force_cuda']:
+            try: Platform.getPlatformByName('CUDA')
+            except: raise RuntimeError('Forcing failure because CUDA platform unavailable')
+        if threads > 1:
+            logger.warn("Setting the number of threads will have no effect on OpenMM engine.\n")
+    elif engname == "gromacs":
+        # Gromacs-specific options
+        EngOpts["gmxpath"] = Fopts["gmxpath"]
+        EngOpts["gmxsuffix"] = Fopts["gmxsuffix"]
+        EngOpts["gmx_top"] = Fopts["gmx_top"]
+        EngOpts["gmx_mdp"] = Fopts["gmx_mdp"]
+        if Fopts['force_cuda']: logger.warn("force_cuda option has no effect on Gromacs engine.")
+        if Fopts['rpmd_beads'] > 0: raise RuntimeError("Gromacs cannot handle RPMD.")
+        if Fopts['mts']: logger.warn("Gromacs not configured for multiple timestep integrator.")
+        if Fopts['anisotropic']: logger.warn("Gromacs not configured for anisotropic box scaling.")
+    elif engname == "tinker":
+        EngOpts["tinkerpath"] = Fopts["tinkerpath"]
+        EngOpts["tinker_key"] = Fopts["tinker_key"]
 
-    printcool_dictionary(args)
+        # if Fopts['threads'] > 1: 
+    printcool_dictionary(EngOpts, title="Engine options")
 
     # Number of threads, multiple timestep integrator, anisotropic box etc.
-    threads = fopts.get('md_threads', 1)
-    mts = fopts.get('mts_integrator', 0)
-    rpmd_beads = fopts.get('rpmd_beads', 0)
-    force_cuda = fopts.get('force_cuda', 0)
-    nbarostat = fopts.get('n_mcbarostat', 25)
-    anisotropic = fopts.get('anisotropic_box', 0)
-    minimize = fopts.get('minimize_energy', 1)
-
+    # threads = Fopts.get('md_threads', 1)
+    # mts = Fopts.get('mts_integrator', 0)
+    # rpmd_beads = Fopts.get('rpmd_beads', 0)
+    # force_cuda = Fopts.get('force_cuda', 0)
+    # nbarostat = Fopts.get('n_mcbarostat', 25)
+    # anisotropic = Fopts.get('anisotropic_box', 0)
+    # minimize = Fopts.get('minimize_energy', 1)
+    sys.exit()
+    
     #----
     # Setting up MD simulations
     #----
-
-    EngOpts = OrderedDict()
-    EngOpts = OrderedDict([("coords", fopts['coords']), ("pbc", False)])
+    
 
     EngOpts["liquid"] = OrderedDict([("coords", liquid_fnm), ("mol", ML), ("pbc", True)])
     GenOpts = OrderedDict([('FF', FF)])
@@ -144,8 +197,8 @@ def main():
         if threads > 1: logger.warn("Setting the number of threads will have no effect on OpenMM engine.\n")
     elif engname == "gromacs":
         # Gromacs-specific options
-        GenOpts["gmxpath"] = fopts["gmxpath"]
-        GenOpts["gmxsuffix"] = fopts["gmxsuffix"]
+        GenOpts["gmxpath"] = Fopts["gmxpath"]
+        GenOpts["gmxsuffix"] = Fopts["gmxsuffix"]
         EngOpts["liquid"]["gmx_top"] = os.path.splitext(liquid_fnm)[0] + ".top"
         EngOpts["liquid"]["gmx_mdp"] = os.path.splitext(liquid_fnm)[0] + ".mdp"
         EngOpts["gas"]["gmx_top"] = os.path.splitext(gas_fnm)[0] + ".top"
@@ -156,7 +209,7 @@ def main():
         if anisotropic: logger.warn("Gromacs not configured for anisotropic box scaling.")
     elif engname == "tinker":
         # Tinker-specific options
-        GenOpts["tinkerpath"] = fopts["tinkerpath"]
+        GenOpts["tinkerpath"] = Fopts["tinkerpath"]
         EngOpts["liquid"]["tinker_key"] = os.path.splitext(liquid_fnm)[0] + ".key"
         EngOpts["gas"]["tinker_key"] = os.path.splitext(gas_fnm)[0] + ".key"
         if force_cuda: logger.warn("force_cuda option has no effect on Tinker engine.")
@@ -174,7 +227,7 @@ def main():
                                     ("temperature", temperature), ("pressure", pressure),
                                     ("nequil", liquid_nequil), ("minimize", minimize),
                                     ("nsave", int(1000 * liquid_intvl / liquid_timestep)),
-                                    ("verbose", True), ('save_traj', fopts['save_traj']), 
+                                    ("verbose", True), ('save_traj', Fopts['save_traj']), 
                                     ("threads", threads), ("anisotropic", anisotropic), ("nbarostat", nbarostat),
                                     ("mts", mts), ("rpmd_beads", rpmd_beads), ("faststep", faststep)])
     MDOpts["gas"] = OrderedDict([("nsteps", gas_nsteps), ("timestep", gas_timestep),
diff --git a/src/gmxio.py b/src/gmxio.py
index bcd0525a8..b43dc205c 100644
--- a/src/gmxio.py
+++ b/src/gmxio.py
@@ -1497,6 +1497,8 @@ def __init__(self,options,tgt_opts,forcefield):
         self.engname = "gromacs"
         # Valid coordinate suffix.
         self.crdsfx = ['.gro', '.pdb']
+        # Auxiliary (e.g. topology) files.
+        self.auxsfx = [['.mdp'], ['.top']]
         # Command prefix.
         self.mdpfx = "bash gmxprefix.bash"
         # Scripts to be copied from the ForceBalance installation directory.
diff --git a/src/simulation.py b/src/simulation.py
new file mode 100644
index 000000000..7bd52ae91
--- /dev/null
+++ b/src/simulation.py
@@ -0,0 +1,76 @@
+import os
+from forcebalance.molecule import Molecule
+from collections import OrderedDict
+
+class Simulation(object):
+
+    """ 
+    Data container for a MD simulation (specified by index, simulation
+    type, initial condition).  These settings are written to a file
+    then passed to md_one.py.
+
+    The Simulation object is passed between the master ForceBalance
+    process and the remote script (e.g. md_one.py).
+    """
+
+    type_settings = {'gas': {'pbc' : 0},
+                     'liquid': {'pbc' : 1},
+                     'solid': {'pbc' : 1, 'anisotropic_box' : 1},
+                     'bilayer': {'pbc' : 1, 'anisotropic_box' : 1}}
+
+    def __init__(self, target, name, index, stype, initial, iframe, tsnames):
+        print target.root, target.tgtdir
+        raw_input()
+        # The simulation name will identify the simulation within a collection
+        # belonging to the Index.
+        self.name = name
+        # The Index that the simulation belongs to.
+        self.index = index
+        # The type of simulation (liquid, gas, solid, bilayer...)
+        if stype not in Simulation.type_settings.keys():
+            logger.error('Simulation type %s is not supported at this time')
+            raise RuntimeError
+        self.type = stype
+        # The file containing initial coordinates.
+        self.initial = initial
+        # The frame number in the initial coordinate file.
+        self.iframe = iframe
+        # The time series for the simulation.
+        self.timeseries = OrderedDict([(i, []) for i in tsnames])
+        # The file extension that the coordinate file will be written with.
+        self.fext = os.path.splitext(initial)[1]
+        # The file name of the coordinate file.
+        self.coords = "%s%s" % (self.type, self.fext)
+        # The number of threads for this simulation.
+        self.threads = target.OptionDict.get('md_threads', 1)
+        # Whether to use multiple timestep integrator.
+        self.mts = target.OptionDict.get('mts_integrator', 0)
+        # The number of beads in an RPMD simulation.
+        self.rpmd_beads = target.OptionDict.get('rpmd_beads', 0)
+        # Whether to use the CUDA platform (OpenMM only).
+        self.force_cuda = target.OptionDict.get('force_cuda', 0)
+        # Number of MD steps between successive calls to Monte Carlo barostat (OpenMM only).
+        self.nbarostat = target.OptionDict.get('n_mcbarostat', 25)
+        # Flag for anisotropic simulation cell.
+        self.anisotropic = target.OptionDict.get('anisotropic_box', 0)
+        # Flag for minimizing the energy.
+        self.minimize = target.OptionDict.get('minimize_energy', 0)
+        # Finite difference step size.
+        self.h = target.h
+        # Name of the simulation engine.
+        self.engname = target.engname
+        # Whether to use periodic boundary conditions.
+        self.pbc = Simulation.type_settings[self.type]['pbc']
+        # Gromacs-specific options.
+        if self.engname == 'gromacs':
+            self.gmxpath = target.gmxpath
+            self.gmxsuffix = target.gmxsuffix
+        elif self.engname == 'tinker':
+            self.tinkerpath = target.tinkerpath
+
+    def __str__(self):
+        msg = []
+        msg.append("Simulation: Name %s, Index %s, Type %s" % (self.name, self.index, self.type))
+        msg.append("Initial Conditions: File %s Frame %i" % (self.initial, self.iframe))
+        msg.append("Timeseries Names: %s" % (', '.join(self.timeseries.keys())))
+        return "\n".join(msg)
diff --git a/src/thermo.py b/src/thermo.py
index 8736bf1e6..19f66a43c 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -9,6 +9,7 @@
 import cStringIO
 
 from forcebalance.molecule import Molecule
+from forcebalance.simulation import Simulation
 from forcebalance.observable import OMap
 from forcebalance.target import Target
 from forcebalance.finite_difference import in_fd
@@ -350,6 +351,81 @@ def stand_head(head, obs):
         logger.debug("header %s renamed to %s\n" % (hfirst, newh))
     return newh, punit, obs
 
+def find_file(tgtdir, index, stype, sufs, icn):
+    """ 
+    Search for a suitable file that matches the simulation index,
+    type, suffix and IC number.  This can be used to search for
+    initial coordinates, but also auxiliary files for the
+    simulation (e.g. .top and .mdp files for a Gromacs simulation,
+    or .key files for a Tinker simulation.)
+
+    Generally, it is preferred to provide files where the base
+    name matches the simulation type.  However, since it is also
+    okay to put all files for a simulation type into a
+    subdirectory, generic file names like 'topol' and 'conf' may
+    be used.
+
+    Initial condition files will be searched for in the following priority (suf stands for suffix)
+    targets/target_name/index/stype/ICs/stype_#.suf
+    targets/target_name/index/stype/ICs/stype#.suf
+    targets/target_name/index/stype/ICs/#.suf
+    targets/target_name/index/stype/ICs/stype.suf
+    targets/target_name/index/stype/ICs/coords.suf
+    targets/target_name/index/stype/ICs/conf.suf
+    targets/target_name/index/stype/ICs/topol.suf
+    targets/target_name/index/stype/ICs/grompp.suf
+    targets/target_name/index/stype/ICs/input.suf
+    targets/target_name/index/stype/ICs/tinker.suf
+    targets/target_name/index/stype/stype.suf
+    targets/target_name/index/stype/coords.suf
+    targets/target_name/index/stype.suf
+    targets/target_name/stype.suf
+
+    @param[in] index Name of the index directory to look in
+    @param[in] stype Name of the simulation type to look for
+    @param[in] sufs List of suffixes to look for in order of priority
+    @param[in] icn Initial coordinate number (will look for sequentially numbered file, or single file with multiple structures)
+    """
+    found = ''
+    # The 2-tuple here corresponds to:
+    # - Search path for the file
+    # - Whether the file that we're looking for is 'numbered'
+    #   (i.e. a different file for each structure); otherwise the
+    #   single file may contain multiple structures
+    pfxs = [stype, 'coords', 'conf', 'topol', 'grompp', 'input', 'tinker', '']
+    
+    basefnms = list(itertools.chain(*[[(os.path.join(index, stype, 'ICs', pfx+'_'+("%i" % icn)), True),
+                                       (os.path.join(index, stype, 'ICs', pfx+("%i" % icn)), True),
+                                       (os.path.join(index, stype, 'ICs', pfx), False),
+                                       (os.path.join(index, stype, pfx), False),
+                                       (os.path.join(index, pfx), False),
+                                       (os.path.join(pfx), False)] for pfx in pfxs]))
+    
+    paths = OrderedDict()
+    for fnm, numbered in basefnms:
+        for suf in sufs:
+            fpath = os.path.join(tgtdir, fnm+suf if suf.startswith('.') else fnm+'.'+suf)
+            paths[fpath] = os.path.exists(fpath)
+            if os.path.exists(fpath):
+                if found != '':
+                    logger.info('Target %s Index %s Simulation %s : '
+                                '%s overrides %s\n' % (os.path.basename(tgtdir), index, stype, fpath))
+                else:
+                    if not numbered:
+                        M = Molecule(fpath)
+                        if len(M) <= icn:
+                            logger.error("Target %s Index %s Simulation %s : "
+                                         "file %s doesn't have enough structures\n" % 
+                                         (os.path.basename(tgtdir), index, stype, fpath))
+                            raise RuntimeError
+                    logger.info('Target %s Index %s Simulation %s : '
+                                'found file %s\n' % (os.path.basename(tgtdir), index, stype, fpath))
+                    found = fpath
+    if found == '':
+        logger.error("Can't find a file for index %s, simulation %s, suffix %s in the search path" % (index, stype, '/'.join(sufs)))
+        raise RuntimeError
+    return found, 0 if numbered else icn
+
 class Thermo(Target):
     """
     A target for fitting general experimental data sets. The source
@@ -577,81 +653,6 @@ def floatcol(col):
 
         return
 
-    def find_file(self, index, stype, sufs, icn):
-        """ 
-        Search for a suitable file that matches the simulation index,
-        type, suffix and IC number.  This can be used to search for
-        initial coordinates, but also auxiliary files for the
-        simulation (e.g. .top and .mdp files for a Gromacs simulation,
-        or .key files for a Tinker simulation.)
-
-        Generally, it is preferred to provide files where the base
-        name matches the simulation type.  However, since it is also
-        okay to put all files for a simulation type into a
-        subdirectory, generic file names like 'topol' and 'conf' may
-        be used.
-    
-        Initial condition files will be searched for in the following priority (suf stands for suffix)
-        targets/target_name/index/stype/ICs/stype_#.suf
-        targets/target_name/index/stype/ICs/stype#.suf
-        targets/target_name/index/stype/ICs/#.suf
-        targets/target_name/index/stype/ICs/stype.suf
-        targets/target_name/index/stype/ICs/coords.suf
-        targets/target_name/index/stype/ICs/conf.suf
-        targets/target_name/index/stype/ICs/topol.suf
-        targets/target_name/index/stype/ICs/grompp.suf
-        targets/target_name/index/stype/ICs/input.suf
-        targets/target_name/index/stype/ICs/tinker.suf
-        targets/target_name/index/stype/stype.suf
-        targets/target_name/index/stype/coords.suf
-        targets/target_name/index/stype.suf
-        targets/target_name/stype.suf
-
-        @param[in] index Name of the index directory to look in
-        @param[in] stype Name of the simulation type to look for
-        @param[in] sufs List of suffixes to look for in order of priority
-        @param[in] icn Initial coordinate number (will look for sequentially numbered file, or single file with multiple structures)
-        """
-        found = ''
-        # The 2-tuple here corresponds to:
-        # - Search path for the file
-        # - Whether the file that we're looking for is 'numbered'
-        #   (i.e. a different file for each structure); otherwise the
-        #   single file may contain multiple structures
-        pfxs = [stype, 'coords', 'conf', 'topol', 'grompp', 'input', 'tinker', '']
-        
-        basefnms = list(itertools.chain(*[[(os.path.join(index, stype, 'ICs', pfx+'_'+("%i" % icn)), True),
-                                           (os.path.join(index, stype, 'ICs', pfx+("%i" % icn)), True),
-                                           (os.path.join(index, stype, 'ICs', pfx), False),
-                                           (os.path.join(index, stype, pfx), False),
-                                           (os.path.join(index, pfx), False),
-                                           (os.path.join(pfx), False)] for pfx in pfxs]))
-
-        paths = OrderedDict()
-        for fnm, numbered in basefnms:
-            for suf in sufs:
-                fpath = os.path.join(self.tgtdir, fnm+suf if suf.startswith('.') else fnm+'.'+suf)
-                paths[fpath] = os.path.exists(fpath)
-                if os.path.exists(fpath):
-                    if found != '':
-                        logger.info('Target %s Index %s Simulation %s : '
-                                    '%s overrides %s\n' % (self.name, index, stype, fpath))
-                    else:
-                        if not numbered:
-                            M = Molecule(fpath)
-                            if len(M) <= icn:
-                                logger.error("Target %s Index %s Simulation %s : "
-                                             "file %s doesn't have enough structures\n" % 
-                                             (self.name, index, stype, fpath))
-                                raise RuntimeError
-                        logger.info('Target %s Index %s Simulation %s : '
-                                    'found file %s\n' % (self.name, index, stype, fpath))
-                        found = fpath
-        if found == '':
-            logger.error("Can't find a file for index %s, simulation %s, suffix %s in the search path" % (index, stype, '/'.join(sufs)))
-            raise RuntimeError
-        return found, 0 if numbered else icn
-    
     def initialize_observables(self):
         """ 
         Determine Observable objects to be created.  Checks to see
@@ -728,10 +729,9 @@ def initialize_simulations(self):
                 else:
                     n_ic = 1
                 for icn in range(n_ic):
-                    icfnm, icframe = self.find_file(index, stype, self.crdsfx, icn)
+                    icfnm, icframe = find_file(self.tgtdir, index, stype, self.crdsfx, icn)
                     sname = "%s_%i" % (stype, icn) if n_ic > 1 else stype
-                    self.Simulations[index].append(Simulation(sname, index, stype, icfnm, icframe, sorted(list(tsset))))
-                    
+                    self.Simulations[index].append(Simulation(self, sname, index, stype, icfnm, icframe, sorted(list(tsset))))
         return
 
     def submit_jobs(self, mvals, AGrad=True, AHess=True):
@@ -769,7 +769,7 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True):
                 if not (os.path.exists('result.p') or os.path.exists('result.p.bz2')):
                     # Write to disk: Force field object, current parameter values, target options
                     M = Molecule(os.path.join(self.root, Sim.initial))[Sim.iframe]
-                    M.write("%s%s" % (Sim.type, self.crdsfx[0]))
+                    M.write(Sim.coords)
                     # # Get relevant files from the target folder, I suppose.
                     # link_dir_contents(os.path.join(self.root,self.rundir),os.getcwd())
                     # # Determine initial coordinates.
@@ -777,22 +777,33 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True):
                     # self.liquid_mol[simnum%len(self.liquid_mol)].write(self.liquid_coords, ftype='tinker' if self.engname == 'tinker' else None)
                     # Command for running the simulation.
                     ## Copy run scripts from ForceBalance installation directory
-
                     # We can build the entire MD options dictionary here!!
                     # Update dictionary with simulation options.
-                    OptionDict = copy.deepcopy(self.OptionDict)
-                    OptionDict['gradient'] = AGrad
-                    OptionDict['coords'] = "%s%s" % (Sim.type, self.crdsfx[0])
-                    OptionDict['simtype'] = Sim.type
+                    # OptionDict = copy.deepcopy(self.OptionDict)
+                    # OptionDict['gradient'] = AGrad
+                    # Sim.gradient = AGrad
+                    # Sim.nequil = self.nequil
+                    # Sim.nsteps = self.nsteps
+                    # Sim.timestep = self.timestep
+                    # Sim.sample = self.sample
+                    # Sim.h = 
+                    # Sim.pgrad = 
+                    # OptionDict['coords'] = "%s%s" % (Sim.type, self.crdsfx[0])
+                    # OptionDict.update(vars(Sim))
+                    # OptionDict['simtype'] = Sim.type
                     # # In the future we should have these settings 
                     # OptionDict['nequil'] = self.nequil
                     # OptionDict['nsteps'] = self.nsteps
                     # OptionDict['timestep'] = self.timestep
                     # OptionDict['sample'] = self.sample
                     # OptionDict['minimize'] = self.minimize
-                    printcool_dictionary(OptionDict)
+                    # printcool_dictionary(vars(Sim))
+                    # SimOpts = dict(vars(Sim))
+                    Opts = vars(Sim)
+                    Opts['gradient'] = AGrad
+                    Opts['pgrad'] = self.pgrad
 
-                    with wopen('forcebalance.p') as f: lp_dump((self.FF,mvals,OptionDict),f)
+                    with wopen('forcebalance.p') as f: lp_dump((self.FF,mvals,Opts),f)
                     for f in self.scripts:
                         LinkFile(os.path.join(os.path.split(__file__)[0], "data", f),
                                  os.path.join(os.getcwd(), f))
@@ -812,7 +823,7 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True):
                     # if wq == None:
                     #     logger.info("Running condensed phase simulation locally.\n")
                     #     logger.info("You may tail -f %s/npt.out in another terminal window\n" % os.getcwd())
-                    _exec(cmdstr, copy_stderr=True, outfnm='md_one.out')
+                    _exec(cmdstr, copy_stderr=False, outfnm='md_one.out')
                     # else:
                     #     queue_up(wq, command = cmdstr+' &> npt.out',
                     #              input_files = self.nptfiles + self.scripts + ['forcebalance.p'],
@@ -1095,31 +1106,3 @@ def __str__(self):
 
         return "\n".join(msg)
 
-class Simulation(object):
-
-    """ 
-    Data container for a simulation (specified by index, simulation
-    type, initial condition).
-    """
-
-    def __init__(self, name, index, stype, initial, iframe, tsnames):
-        # The simulation name will identify the simulation within a collection
-        # belonging to the Index.
-        self.name = name
-        # The Index that the simulation belongs to.
-        self.index = index
-        # The type of simulation (liquid, gas, solid, bilayer...)
-        self.type = stype
-        # The file containing initial coordinates.
-        self.initial = initial
-        # The frame number in the initial coordinate file.
-        self.iframe = iframe
-        # The time series for the simulation.
-        self.timeseries = OrderedDict([(i, []) for i in tsnames])
-
-    def __str__(self):
-        msg = []
-        msg.append("Simulation: Name %s, Index %s, Type %s" % (self.name, self.index, self.type))
-        msg.append("Initial Conditions: File %s Frame %i" % (self.initial, self.iframe))
-        msg.append("Timeseries Names: %s" % (', '.join(self.timeseries.keys())))
-        return "\n".join(msg)

From 0cb3e61c8b7b66abe872f0387bbf0b2a5ddabc69 Mon Sep 17 00:00:00 2001
From: Lee-Ping Wang <leeping@Lee-Pings-MacBook-Pro.local>
Date: Tue, 22 Apr 2014 22:24:44 -0700
Subject: [PATCH 19/25] md_one.py creates Engine object.

---
 src/data/md_one.py                            |  92 +++++---
 src/simulation.py                             |  76 -------
 src/thermo.py                                 | 202 ++++++++++++++----
 .../targets/LiquidBromine/1/gas.mdp           |  14 +-
 4 files changed, 226 insertions(+), 158 deletions(-)
 delete mode 100644 src/simulation.py

diff --git a/src/data/md_one.py b/src/data/md_one.py
index 9da428e84..ff1763179 100644
--- a/src/data/md_one.py
+++ b/src/data/md_one.py
@@ -36,9 +36,6 @@
 
 # Note: Only the simulation settings that vary across different
 # simulations in a target may be specified on the command line.
-
-# ANYTHING THREE LETTERS OR BELOW IS A SHORT OPTION WITH ONE DASH!
-
 parser = argparse.ArgumentParser()
 parser.add_argument('simulation', type=str,
                     help='The simulation name (important; used in setting up)')
@@ -66,7 +63,41 @@
 parser.add_argument('-o', '-out', '--output', dest='output', type=str, nargs='+', 
                     help='Specify the time series which are written to disk')
 
-Copts = vars(parser.parse_args())
+# Parse the command line options and save as a dictionary (don't save NoneTypes)
+parsed = parser.parse_args()
+args = OrderedDict([(i, j) for i, j in vars(parsed).items() if j != None])
+
+#----
+# Load the ForceBalance pickle file which contains:
+#----
+# - Force field object
+# - Optimization parameters
+# - Options loaded from file
+FF, mvals, Sim = lp_load(open('forcebalance.p'))
+FF.ffdir = '.'
+
+# Engine name.
+engname = Sim.engname
+
+# Import modules and create the correct Engine object.
+if engname == "openmm":
+    try:
+        from simtk.unit import *
+        from simtk.openmm import *
+        from simtk.openmm.app import *
+    except:
+        traceback.print_exc()
+        raise Exception("Cannot import OpenMM modules")
+    from forcebalance.openmmio import *
+    Engine = OpenMM
+elif engname == "gromacs" or engname == "gmx":
+    from forcebalance.gmxio import *
+    Engine = GMX
+elif engname == "tinker":
+    from forcebalance.tinkerio import *
+    Engine = TINKER
+else:
+    raise Exception('OpenMM, GROMACS, and TINKER are supported at this time.')
 
 def main():
     
@@ -88,39 +119,44 @@ def main():
     
     """
 
-    # printcool("ForceBalance simulation using engine: %s" % engname.upper(),
-    #           color=4, bold=True)
+    # Write the force field file.
+    FF.make(mvals)
+
+    # Read the command line options (they may override the options from file.)
+    AGrad = args['gradient'] or Sim.gradient
+    for i in ['temperature', 'pressure', 'nequil', 'nsteps', 'timestep', 'sample', 'threads', 'minimize']:
+        if i in args:
+            Sim.MDOpts[i] = args[i]
 
     #----
-    # Load the ForceBalance pickle file which contains:
+    # Print some options.
+    # At this point, engine and MD options should be SET!
     #----
-    # - Force field object
-    # - Optimization parameters
-    # - Options loaded from file
-    FF, mvals, Fopts = lp_load(open('forcebalance.p'))
-    FF.ffdir = '.'
-    # Write the force field file.
-    FF.make(mvals)
+    printcool("ForceBalance simulation using engine: %s" % engname.upper(),
+              color=4, bold=True)
+    printcool_dictionary(args, title="Options from command line")
+    printcool_dictionary(Sim.EngOpts, title="Engine options")
+    printcool_dictionary(Sim.MDOpts, title="Molecular dynamics options")
+
+    #----
+    # For convenience, assign some local variables.
+    #----
+    # Finite difference step size
+    h = Sim.h
+    # Active parameters to differentiate
+    pgrad = Sim.pgrad
 
-    printcool_dictionary(Copts, title="Options from command line")
-    printcool_dictionary(Fopts, title="Options from file")
+    # Create instances of the MD Engine objects.
+    MDEngine = Engine(name=Sim.type, **Sim.EngOpts)
 
-    # Read the command line options (they can override the options from file.)
-    # Calculate energy / dipole derivatives.
-    AGrad = Copts['gradient'] or Fopts['gradient']
-    # Whether to minimize the energy.
-    minimize = Copts['minimize'] or Fopts['minimize']
-    # Engine name.
-    engname = Fopts['engname']
-    # 
-    threads = Copts.get('threads', Fopts.get('threads', 1))
+    sys.exit()
 
     # # Get the temperature.
-    # temperature = Copts.get('temperature', Fopts.get('temperature', None))
+    # temperature = args.get('temperature', Fopts.get('temperature', None))
     # # Get the pressure.
-    # pressure = Copts.get('pressure', Fopts.get('pressure', None))
+    # pressure = args.get('pressure', Fopts.get('pressure', None))
     # # 
-    # nequil = Copts.get('nequil', Fopts.get('nequil'))
+    # nequil = args.get('nequil', Fopts.get('nequil'))
     
     #----
     # load some options from file
diff --git a/src/simulation.py b/src/simulation.py
deleted file mode 100644
index 7bd52ae91..000000000
--- a/src/simulation.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import os
-from forcebalance.molecule import Molecule
-from collections import OrderedDict
-
-class Simulation(object):
-
-    """ 
-    Data container for a MD simulation (specified by index, simulation
-    type, initial condition).  These settings are written to a file
-    then passed to md_one.py.
-
-    The Simulation object is passed between the master ForceBalance
-    process and the remote script (e.g. md_one.py).
-    """
-
-    type_settings = {'gas': {'pbc' : 0},
-                     'liquid': {'pbc' : 1},
-                     'solid': {'pbc' : 1, 'anisotropic_box' : 1},
-                     'bilayer': {'pbc' : 1, 'anisotropic_box' : 1}}
-
-    def __init__(self, target, name, index, stype, initial, iframe, tsnames):
-        print target.root, target.tgtdir
-        raw_input()
-        # The simulation name will identify the simulation within a collection
-        # belonging to the Index.
-        self.name = name
-        # The Index that the simulation belongs to.
-        self.index = index
-        # The type of simulation (liquid, gas, solid, bilayer...)
-        if stype not in Simulation.type_settings.keys():
-            logger.error('Simulation type %s is not supported at this time')
-            raise RuntimeError
-        self.type = stype
-        # The file containing initial coordinates.
-        self.initial = initial
-        # The frame number in the initial coordinate file.
-        self.iframe = iframe
-        # The time series for the simulation.
-        self.timeseries = OrderedDict([(i, []) for i in tsnames])
-        # The file extension that the coordinate file will be written with.
-        self.fext = os.path.splitext(initial)[1]
-        # The file name of the coordinate file.
-        self.coords = "%s%s" % (self.type, self.fext)
-        # The number of threads for this simulation.
-        self.threads = target.OptionDict.get('md_threads', 1)
-        # Whether to use multiple timestep integrator.
-        self.mts = target.OptionDict.get('mts_integrator', 0)
-        # The number of beads in an RPMD simulation.
-        self.rpmd_beads = target.OptionDict.get('rpmd_beads', 0)
-        # Whether to use the CUDA platform (OpenMM only).
-        self.force_cuda = target.OptionDict.get('force_cuda', 0)
-        # Number of MD steps between successive calls to Monte Carlo barostat (OpenMM only).
-        self.nbarostat = target.OptionDict.get('n_mcbarostat', 25)
-        # Flag for anisotropic simulation cell.
-        self.anisotropic = target.OptionDict.get('anisotropic_box', 0)
-        # Flag for minimizing the energy.
-        self.minimize = target.OptionDict.get('minimize_energy', 0)
-        # Finite difference step size.
-        self.h = target.h
-        # Name of the simulation engine.
-        self.engname = target.engname
-        # Whether to use periodic boundary conditions.
-        self.pbc = Simulation.type_settings[self.type]['pbc']
-        # Gromacs-specific options.
-        if self.engname == 'gromacs':
-            self.gmxpath = target.gmxpath
-            self.gmxsuffix = target.gmxsuffix
-        elif self.engname == 'tinker':
-            self.tinkerpath = target.tinkerpath
-
-    def __str__(self):
-        msg = []
-        msg.append("Simulation: Name %s, Index %s, Type %s" % (self.name, self.index, self.type))
-        msg.append("Initial Conditions: File %s Frame %i" % (self.initial, self.iframe))
-        msg.append("Timeseries Names: %s" % (', '.join(self.timeseries.keys())))
-        return "\n".join(msg)
diff --git a/src/thermo.py b/src/thermo.py
index 19f66a43c..56cdf7dd5 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -3,13 +3,13 @@
 import csv
 import copy
 import errno
+import shutil
 import numpy as np
 import pandas as pd
 import itertools
 import cStringIO
 
 from forcebalance.molecule import Molecule
-from forcebalance.simulation import Simulation
 from forcebalance.observable import OMap
 from forcebalance.target import Target
 from forcebalance.finite_difference import in_fd
@@ -27,6 +27,14 @@
 # print logger.parent.parent.handlers[0]
 # logger.parent.parent.handlers = []
 
+def getval(dframe, col):
+    """ Extract the single non-NaN value from a column. """
+    nnan = [i for i in dframe[col] if not isnpnan(i)]
+    if len(nnan) != 1:
+        logger.error('%i values in column %s are not NaN (expected only 1)' % (len(nnan), col))
+        raise RuntimeError
+    return nnan[0]
+
 class TextParser(object):
     """ Parse a text file. """
     def __init__(self, fnm):
@@ -351,7 +359,7 @@ def stand_head(head, obs):
         logger.debug("header %s renamed to %s\n" % (hfirst, newh))
     return newh, punit, obs
 
-def find_file(tgtdir, index, stype, sufs, icn):
+def find_file(tgtdir, index, stype, sufs, iscrd, icn=0):
     """ 
     Search for a suitable file that matches the simulation index,
     type, suffix and IC number.  This can be used to search for
@@ -381,9 +389,11 @@ def find_file(tgtdir, index, stype, sufs, icn):
     targets/target_name/index/stype.suf
     targets/target_name/stype.suf
 
-    @param[in] index Name of the index directory to look in
+    @param[in] tgtdir Name of the target directory to look in
+    @param[in] index Name of the index directory to look in (within tgtdir)
     @param[in] stype Name of the simulation type to look for
     @param[in] sufs List of suffixes to look for in order of priority
+    @param[in] iscrd Whether the file is a coordinate file (false for auxiliary files like .mdp).
     @param[in] icn Initial coordinate number (will look for sequentially numbered file, or single file with multiple structures)
     """
     found = ''
@@ -411,7 +421,7 @@ def find_file(tgtdir, index, stype, sufs, icn):
                     logger.info('Target %s Index %s Simulation %s : '
                                 '%s overrides %s\n' % (os.path.basename(tgtdir), index, stype, fpath))
                 else:
-                    if not numbered:
+                    if iscrd and not numbered:
                         M = Molecule(fpath)
                         if len(M) <= icn:
                             logger.error("Target %s Index %s Simulation %s : "
@@ -420,7 +430,7 @@ def find_file(tgtdir, index, stype, sufs, icn):
                             raise RuntimeError
                     logger.info('Target %s Index %s Simulation %s : '
                                 'found file %s\n' % (os.path.basename(tgtdir), index, stype, fpath))
-                    found = fpath
+                    found = os.path.abspath(fpath)
     if found == '':
         logger.error("Can't find a file for index %s, simulation %s, suffix %s in the search path" % (index, stype, '/'.join(sufs)))
         raise RuntimeError
@@ -445,13 +455,15 @@ def __init__(self, options, tgt_opts, forcefield):
         # Length of simulation chain
         self.set_option(tgt_opts, "simulations", "user_simulation_names", forceprint=True)
         # Number of time steps in the equilibration run
-        self.set_option(tgt_opts, "eq_steps", "nequil", forceprint=True)
+        self.set_option(tgt_opts, "eq_steps", forceprint=True)
         # Number of time steps in the production run
-        self.set_option(tgt_opts, "md_steps", "nsteps", forceprint=True)
+        self.set_option(tgt_opts, "md_steps", forceprint=True)
         # Time step (in femtoseconds)
         self.set_option(tgt_opts, "timestep", forceprint=True)
         # Sampling interval (in picoseconds)
-        self.set_option(tgt_opts, "interval", "sample", forceprint=True)
+        self.set_option(tgt_opts, "interval", forceprint=True)
+        # Save trajectories?
+        self.set_option(tgt_opts, "save_traj", forceprint=True)
 
         ## Variables
         # Prefix names for simulation data
@@ -729,9 +741,8 @@ def initialize_simulations(self):
                 else:
                     n_ic = 1
                 for icn in range(n_ic):
-                    icfnm, icframe = find_file(self.tgtdir, index, stype, self.crdsfx, icn)
                     sname = "%s_%i" % (stype, icn) if n_ic > 1 else stype
-                    self.Simulations[index].append(Simulation(self, sname, index, stype, icfnm, icframe, sorted(list(tsset))))
+                    self.Simulations[index].append(Simulation(self, self.Data.ix[index], sname, index, stype, icn, sorted(list(tsset))))
         return
 
     def submit_jobs(self, mvals, AGrad=True, AHess=True):
@@ -763,52 +774,25 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True):
             temp = self.Data2['temp'].ix[index] if 'temp' in self.Data2 else None
             pres = self.Data2['pres'].ix[index] if 'pres' in self.Data2 else None
             for Sim in self.Simulations[index]:
+                Sim.gradient = AGrad
                 simd = os.path.join(os.getcwd(), index, Sim.name)
                 GoInto(simd)
                 # Submit or run the simulation if the result file does not exist.
                 if not (os.path.exists('result.p') or os.path.exists('result.p.bz2')):
-                    # Write to disk: Force field object, current parameter values, target options
+                    # Write coordinate file in the current location.
                     M = Molecule(os.path.join(self.root, Sim.initial))[Sim.iframe]
-                    M.write(Sim.coords)
-                    # # Get relevant files from the target folder, I suppose.
-                    # link_dir_contents(os.path.join(self.root,self.rundir),os.getcwd())
-                    # # Determine initial coordinates.
-                    # self.last_traj += [os.path.join(os.getcwd(), i) for i in self.extra_output]
-                    # self.liquid_mol[simnum%len(self.liquid_mol)].write(self.liquid_coords, ftype='tinker' if self.engname == 'tinker' else None)
-                    # Command for running the simulation.
-                    ## Copy run scripts from ForceBalance installation directory
-                    # We can build the entire MD options dictionary here!!
-                    # Update dictionary with simulation options.
-                    # OptionDict = copy.deepcopy(self.OptionDict)
-                    # OptionDict['gradient'] = AGrad
-                    # Sim.gradient = AGrad
-                    # Sim.nequil = self.nequil
-                    # Sim.nsteps = self.nsteps
-                    # Sim.timestep = self.timestep
-                    # Sim.sample = self.sample
-                    # Sim.h = 
-                    # Sim.pgrad = 
-                    # OptionDict['coords'] = "%s%s" % (Sim.type, self.crdsfx[0])
-                    # OptionDict.update(vars(Sim))
-                    # OptionDict['simtype'] = Sim.type
-                    # # In the future we should have these settings 
-                    # OptionDict['nequil'] = self.nequil
-                    # OptionDict['nsteps'] = self.nsteps
-                    # OptionDict['timestep'] = self.timestep
-                    # OptionDict['sample'] = self.sample
-                    # OptionDict['minimize'] = self.minimize
-                    # printcool_dictionary(vars(Sim))
-                    # SimOpts = dict(vars(Sim))
-                    Opts = vars(Sim)
-                    Opts['gradient'] = AGrad
-                    Opts['pgrad'] = self.pgrad
-
-                    with wopen('forcebalance.p') as f: lp_dump((self.FF,mvals,Opts),f)
+                    M.write(Sim.EngOpts['coords'])
+                    # Copy auxiliary files to the current location.
+                    for i, j in Sim.faux.values():
+                        shutil.copy2(i, j)
+                    # Write to disk: Force field object, current parameter values, target options
+                    with wopen('forcebalance.p') as f: lp_dump((self.FF,mvals,Sim),f)
+                    # Copy scripts to the current location.
                     for f in self.scripts:
                         LinkFile(os.path.join(os.path.split(__file__)[0], "data", f),
                                  os.path.join(os.getcwd(), f))
+                    # Put together the command.
                     cmdlist = ['%s python md_one.py %s' % (self.mdpfx, Sim.type)]
-                    #cmdlist.append('-eq %i -md %i -dt %g -sp %g' % (self.nequil, self.nsteps, self.timestep, self.sample))
                     if temp != None:
                         cmdlist.append('-T %g' % float(temp))
                     if pres != None:
@@ -1106,3 +1090,127 @@ def __str__(self):
 
         return "\n".join(msg)
 
+class Simulation(object):
+
+    """ 
+    Data container for a MD simulation (specified by index, simulation
+    type, initial condition).  These settings are written to a file
+    then passed to md_one.py.
+
+    The Simulation object is passed between the master ForceBalance
+    process and the remote script (e.g. md_one.py).
+    """
+
+    type_settings = {'gas': {'pbc' : 0},
+                     'liquid': {'pbc' : 1},
+                     'solid': {'pbc' : 1, 'anisotropic_box' : 1},
+                     'bilayer': {'pbc' : 1, 'anisotropic_box' : 1}}
+
+    def __init__(self, tgt, data, name, index, stype, icn, tsnames):
+
+        # The name of the simulation (refers to a directory under job.tmp/target/iter_x/index/name)
+        self.name = name
+        # The Index that the simulation belongs to.
+        self.index = index
+        # The type of simulation (liquid, gas, solid, bilayer...)
+        if stype not in Simulation.type_settings.keys():
+            logger.error('Simulation type %s is not supported at this time')
+            raise RuntimeError
+        # The reference data! May contain parameters for calculating observables.
+        self.Data = copy.deepcopy(data)
+        # Type of the simulation (map to simulation settings)
+        self.type = stype
+        # Locate the initial coordinate file and frame number.
+        self.initial, self.iframe = find_file(os.path.join(tgt.root, tgt.tgtdir), index, stype, tgt.crdsfx, True, icn)
+        # The time series for the simulation.
+        self.timeseries = OrderedDict([(i, []) for i in tsnames])
+        # The file extension that the coordinate file will be written with.
+        self.fext = os.path.splitext(self.initial)[1]
+        # Auxiliary files to be copied to the current location prior to running the simulation.
+        self.faux = OrderedDict()
+        for sfx in tgt.auxsfx:
+            auxf = find_file(os.path.join(tgt.root, tgt.tgtdir), index, stype, sfx, False)[0]
+            self.faux[os.path.splitext(auxf)[1]] = (auxf, "%s%s" % (self.type, os.path.splitext(auxf)[1]))
+        # Name of the simulation engine
+        self.engname = tgt.engname
+        # Whether to use the CUDA platform (OpenMM only).
+        self.force_cuda = tgt.OptionDict.get('force_cuda', 0)
+        # Finite difference step size.
+        self.h = tgt.h
+        # Active parameters to differentiate over.
+        self.pgrad = tgt.pgrad
+
+        pbc = Simulation.type_settings[self.type]['pbc']
+
+        #----
+        # MD options, passed straight to the molecular_dynamics() method
+        #----
+        self.MDOpts = OrderedDict()
+        # The time step in femtoseconds.
+        self.MDOpts['timestep'] = tgt.timestep
+        # The number of equilibration MD steps.
+        self.MDOpts['nequil'] = tgt.eq_steps
+        # The number of production MD steps.
+        self.MDOpts['nsteps'] = tgt.md_steps
+        # The number of MD steps between sampling.
+        self.MDOpts['nsave'] = int(1000 * tgt.interval / self.MDOpts['timestep'])
+        # Flag for minimizing the energy.
+        self.MDOpts['minimize'] = tgt.OptionDict.get('minimize_energy', 0)
+        # The number of threads for this simulation (no-PBC simulations are 1 thread).
+        self.MDOpts['threads'] = tgt.OptionDict.get('md_threads', 1) if pbc else 1
+        # Whether to use multiple timestep integrator.
+        self.MDOpts['mts'] = tgt.OptionDict.get('mts_integrator', 0)
+        # The number of beads in an RPMD simulation.
+        self.MDOpts['rpmd_beads'] = tgt.OptionDict.get('rpmd_beads', 0)
+        # Print out lots of information.
+        self.MDOpts['verbose'] = True
+        # Save trajectory to disk.
+        self.MDOpts['save_traj'] = tgt.save_traj
+        # Number of MD steps between successive calls to Monte Carlo barostat (OpenMM only).
+        self.MDOpts['nbarostat'] = tgt.OptionDict.get('n_mcbarostat', 25)
+        # Flag for anisotropic simulation cell (OpenMM only).
+        self.MDOpts['anisotropic'] = tgt.OptionDict.get('anisotropic_box', 0)
+        # The time step for the 'fast forces' in femtoseconds in MTS integrators.
+        self.MDOpts['timestep'] = tgt.OptionDict.get('faststep', 0.25)
+        # Simulation temperature in Kelvin.
+        self.MDOpts['temperature'] = getval(self.Data, 'temp') if 'temp' in self.Data else None
+        # Simulation pressure in bar.
+        self.MDOpts['pressure'] = getval(self.Data, 'pres') if 'pres' in self.Data else None
+
+        #----
+        # Engine options, used in creating the Engine object
+        #----
+        self.EngOpts = OrderedDict()
+        # Whether to use periodic boundary conditions.
+        self.EngOpts['pbc'] = pbc
+        # The name of the coordinate file to be written prior to running the simulation.
+        self.EngOpts['coords'] = "%s%s" % (self.type, self.fext)
+        # Software-specific options.
+        if self.engname == 'openmm':
+            self.EngOpts['platname'] = 'CUDA' if self.EngOpts['pbc'] else 'Reference'
+        else:
+            if self.force_cuda: 
+                logger.error("force_cuda option is set, but has no effect on Gromacs engine.") ; raise RuntimeError
+            if self.MDOpts['rpmd_beads'] > 0: 
+                logger.error('Only the OpenMM engine can handle RPMD simulations.') ; raise RuntimeError
+            if self.MDOpts['mts']: 
+                logger.error('Only OpenMM is configured to use multiple timestep integrator.') ; raise RuntimeError
+            if self.MDOpts['anisotropic']: 
+                logger.error('Only OpenMM is configured to use anisotropic pressure coupling.') ; raise RuntimeError
+
+        if self.engname == 'gromacs':
+            self.EngOpts['gmxpath'] = tgt.gmxpath
+            self.EngOpts['gmxsuffix'] = tgt.gmxsuffix
+            self.EngOpts['gmx_top'] = self.faux['.top'][1]
+            self.EngOpts['gmx_mdp'] = self.faux['.mdp'][1]
+
+        if self.engname == 'tinker':
+            self.EngOpts['tinkerpath'] = tgt.tinkerpath
+            self.EngOpts['tinker_key'] = self.faux['.key'][1]
+
+    def __str__(self):
+        msg = []
+        msg.append("Simulation: Name %s, Index %s, Type %s" % (self.name, self.index, self.type))
+        msg.append("Initial Conditions: File %s Frame %i" % (self.initial, self.iframe))
+        msg.append("Timeseries Names: %s" % (', '.join(self.timeseries.keys())))
+        return "\n".join(msg)
diff --git a/studies/004_thermo/targets/LiquidBromine/1/gas.mdp b/studies/004_thermo/targets/LiquidBromine/1/gas.mdp
index 1a64558b6..2a7427065 100644
--- a/studies/004_thermo/targets/LiquidBromine/1/gas.mdp
+++ b/studies/004_thermo/targets/LiquidBromine/1/gas.mdp
@@ -11,16 +11,16 @@ nstxtcout       = 50
 xtc_grps        = System
 energygrps      = System
 
-nstlist		= 10
-ns_type		= grid
-rlist		= 0.9
+nstlist		= 0
+ns_type		= simple
+rlist		= 0.0
 vdwtype		= cut-off
 coulombtype	= cut-off
-rcoulomb	= 0.9
-rvdw		= 0.9
-rvdw_switch     = 0.9
+rcoulomb	= 0.0
+rvdw		= 0.0
+rvdw_switch     = 0.0
 constraints	= all-bonds
-pbc		= xyz
+pbc		= no
 
 tcoupl		= v-rescale
 tc_grps		= System

From c256ce09d8270b87c55b566150a70a53e959ed1e Mon Sep 17 00:00:00 2001
From: Lee-Ping Wang <leeping@Lee-Pings-MacBook-Pro.local>
Date: Tue, 22 Apr 2014 22:38:05 -0700
Subject: [PATCH 20/25] md_one.py runs molecular dynamics!

---
 src/data/md_one.py | 14 ++++++++++----
 src/gmxio.py       | 15 +++++++++++++++
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/src/data/md_one.py b/src/data/md_one.py
index ff1763179..46b8b0bcf 100644
--- a/src/data/md_one.py
+++ b/src/data/md_one.py
@@ -21,6 +21,7 @@
 import numpy as np
 import importlib as il
 
+from forcebalance.nifty import click
 from forcebalance.nifty import lp_dump, lp_load, wopen
 from forcebalance.nifty import printcool, printcool_dictionary
 from forcebalance.molecule import Molecule
@@ -89,13 +90,13 @@
         traceback.print_exc()
         raise Exception("Cannot import OpenMM modules")
     from forcebalance.openmmio import *
-    Engine = OpenMM
+    EngineClass = OpenMM
 elif engname == "gromacs" or engname == "gmx":
     from forcebalance.gmxio import *
-    Engine = GMX
+    EngineClass = GMX
 elif engname == "tinker":
     from forcebalance.tinkerio import *
-    Engine = TINKER
+    EngineClass = TINKER
 else:
     raise Exception('OpenMM, GROMACS, and TINKER are supported at this time.')
 
@@ -147,7 +148,12 @@ def main():
     pgrad = Sim.pgrad
 
     # Create instances of the MD Engine objects.
-    MDEngine = Engine(name=Sim.type, **Sim.EngOpts)
+    Engine = EngineClass(name=Sim.type, **Sim.EngOpts)
+
+    click() # Start timer.
+    # This line runs the condensed phase simulation.
+    prop_return = Engine.molecular_dynamics(**Sim.MDOpts)
+    logger.info("MD simulation took %.3f seconds\n" % click())
 
     sys.exit()
 
diff --git a/src/gmxio.py b/src/gmxio.py
index b43dc205c..12d9159b3 100644
--- a/src/gmxio.py
+++ b/src/gmxio.py
@@ -1120,13 +1120,18 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None,
         if temperature != None:
             md_opts["ref_t"] = temperature
             md_opts["gen_vel"] = "no"
+            # Set some default methods for temperature coupling.
             md_defs["tc_grps"] = "System"
             md_defs["tcoupl"] = "v-rescale"
             md_defs["tau_t"] = 1.0
         if self.pbc:
             md_opts["comm_mode"] = "linear"
+            # Removing center of mass motion at every time step should not impact performance.
+            # http://gromacs.5086.x6.nabble.com/COM-motion-removal-td4413458.html
+            md_opts["nstcomm"] = 1
             if pressure != None:
                 md_opts["ref_p"] = pressure
+                # Set some default methods for pressure coupling.
                 md_defs["pcoupl"] = "parrinello-rahman"
                 md_defs["tau_p"] = 1.5
         else:
@@ -1172,6 +1177,16 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None,
         self.warngmx("grompp -c %s -p %s.top -f %s-md.mdp -o %s-md.tpr" % (gro2, self.name, self.name, self.name), warnings=warnings, print_command=verbose)
         self.callgmx("mdrun -v -deffnm %s-md -nt %i -stepout %i" % (self.name, threads, nsave), print_command=verbose, print_to_screen=verbose)
 
+        if verbose: logger.info("Finished!\n")
+
+        # Final frame of molecular dynamics.
+        self.md_final = "%s-md.gro" % self.name
+
+        if 1: return
+
+        #----
+        # Below 
+        #----
         self.mdtraj = '%s-md.trr' % self.name
 
         if verbose: logger.info("Production run finished, calculating properties...\n")

From a8ff3bb5b5826f8ecf1aa0de994683a6e2fa6413 Mon Sep 17 00:00:00 2001
From: leeping <leeping@stanford.edu>
Date: Wed, 23 Apr 2014 14:55:30 -0700
Subject: [PATCH 21/25] Work on extracting timeseries

---
 src/data/md_one.py |  15 +++-
 src/gmxio.py       | 201 +++++++++++++++++++++++++++++++++++----------
 2 files changed, 170 insertions(+), 46 deletions(-)

diff --git a/src/data/md_one.py b/src/data/md_one.py
index 46b8b0bcf..70f6256da 100644
--- a/src/data/md_one.py
+++ b/src/data/md_one.py
@@ -152,8 +152,19 @@ def main():
 
     click() # Start timer.
     # This line runs the condensed phase simulation.
-    prop_return = Engine.molecular_dynamics(**Sim.MDOpts)
+    Engine.molecular_dynamics(**Sim.MDOpts)
     logger.info("MD simulation took %.3f seconds\n" % click())
+    
+    # Extract properties.
+    Results = Engine.md_extract(OrderedDict([(i, {}) for i in Sim.timeseries.keys()]))
+
+    # Dump results to file
+    logger.info("Writing final force field.\n")
+    pvals = FF.make(mvals)
+    
+    logger.info("Writing all simulation data to disk.\n")
+    with wopen('md_result.p') as f:
+        lp_dump(Results, f)
 
     sys.exit()
 
@@ -212,6 +223,8 @@ def main():
         # if Fopts['threads'] > 1: 
     printcool_dictionary(EngOpts, title="Engine options")
 
+    
+
     # Number of threads, multiple timestep integrator, anisotropic box etc.
     # threads = Fopts.get('md_threads', 1)
     # mts = Fopts.get('mts_integrator', 0)
diff --git a/src/gmxio.py b/src/gmxio.py
index 12d9159b3..181ad4557 100644
--- a/src/gmxio.py
+++ b/src/gmxio.py
@@ -8,6 +8,7 @@
 
 import os, sys
 import re
+import pandas as pd
 from forcebalance.nifty import *
 from forcebalance.nifty import _exec
 from forcebalance import BaseReader
@@ -1182,28 +1183,25 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None,
         # Final frame of molecular dynamics.
         self.md_final = "%s-md.gro" % self.name
 
-        if 1: return
-
-        #----
-        # Below 
-        #----
+        # Name of the molecular dynamics trajectory.
         self.mdtraj = '%s-md.trr' % self.name
 
-        if verbose: logger.info("Production run finished, calculating properties...\n")
-        # Figure out dipoles - note we use g_dipoles and not the multipole_moments function.
-        self.callgmx("g_dipoles -s %s-md.tpr -f %s-md.trr -o %s-md-dip.xvg -xvg no" % (self.name, self.name, self.name), stdin="System\n")
-
-        # Figure out which energy terms need to be printed.
-        energyterms = self.energy_termnames(edrfile="%s-md.edr" % self.name)
-        ekeep = [k for k,v in energyterms.items() if v <= energyterms['Total-Energy']]
-        ekeep += ['Temperature', 'Volume', 'Density']
-
-        # Calculate deuterium order parameter for bilayer optimization.
+        # Call md_extract and return the prop_return dictionary (backward compatibility with old functionality.)
+        Extract = self.md_extract(OrderedDict([(i, 0) for i in ['potential', 'kinetic', 'dipole', 'components']]))
+        prop_return = {'Potentials': Extract['potential'], 
+                       'Kinetics': Extract['kinetic'], 
+                       'Dips': Extract['dipole'], 
+                       'Ecomps': Extract['components']}
+        if pbc:
+            Extract1 = self.md_extract(OrderedDict([(i, 0) for i in ['density', 'volume']]))
+            prop_return['Rhos'] = Extract['density']
+            prop_return['Volumes'] = Extract['volume']
         if bilayer:
             n_snap = self.n_snaps(nsteps, 1000, timestep)
             Scds = self.calc_scd(n_snap, timestep)
             al_vars = ['Box-Y', 'Box-X']
-            self.callgmx("g_energy -f %s-md.edr -o %s-md-energy-xy.xvg -xvg no" % (self.name, self.name), stdin="\n".join(al_vars))
+            self.callgmx("g_energy -f %s-md.edr -o %s-md-energy-xy.xvg -xvg no" % 
+                         (self.name, self.name), stdin="\n".join(al_vars))
             Xs = []
             Ys = []
             for line in open("%s-md-energy-xy.xvg" % self.name):
@@ -1213,38 +1211,150 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None,
             Xs = np.array(Xs)
             Ys = np.array(Ys)
             Als = (Xs * Ys) / 64
-        else:
-            Scds = 0
-            Als = 0
+            prop_return['Scds'] = Scds
+            prop_return['Als'] = Als
 
-        # Perform energy component analysis and return properties.
-        self.callgmx("g_energy -f %s-md.edr -o %s-md-energy.xvg -xvg no" % (self.name, self.name), stdin="\n".join(ekeep))
-        ecomp = OrderedDict()
-        Rhos = []
-        Volumes = []
-        Kinetics = []
-        Potentials = []
-        for line in open("%s-md-energy.xvg" % self.name):
-            s = [float(i) for i in line.split()]
-            for i in range(len(ekeep) - 2):
-                val = s[i+1]
-                if ekeep[i] in ecomp:
-                    ecomp[ekeep[i]].append(val)
-                else:
-                    ecomp[ekeep[i]] = [val]
-            Rhos.append(s[-1])
-            Volumes.append(s[-2])
-        Rhos = np.array(Rhos)
-        Volumes = np.array(Volumes)
-        Potentials = np.array(ecomp['Potential'])
-        Kinetics = np.array(ecomp['Kinetic-En.'])
-        Dips = np.array([[float(i) for i in line.split()[1:4]] for line in open("%s-md-dip.xvg" % self.name)])
-        Ecomps = OrderedDict([(key, np.array(val)) for key, val in ecomp.items()])
-        # Initialized property dictionary.
-        prop_return = OrderedDict()
-        prop_return.update({'Rhos': Rhos, 'Potentials': Potentials, 'Kinetics': Kinetics, 'Volumes': Volumes, 'Dips': Dips, 'Ecomps': Ecomps, 'Als': Als, 'Scds': Scds})
         if verbose: logger.info("Finished!\n")
         return prop_return
+        # ecomp = OrderedDict()
+        # for line in open("%s-md-energy.xvg" % self.name):
+        #     s = [float(i) for i in line.split()][1:]
+        #     for i, j in enumerate(ekeep):
+        #         val = s[i]
+        #         if j in ecomp:
+        #             ecomp[j].append(s[i])
+        #         else:
+        #             ecomp[j] = [s[i]]
+        # print ecomp.keys()
+            # Rhos.append(s[-1])
+            # Volumes.append(s[-2])
+        # Calculate deuterium order parameter for bilayer optimization.
+        # # Perform energy component analysis and return properties.
+        # self.callgmx("g_energy -f %s-md.edr -o %s-md-energy.xvg -xvg no" % (self.name, self.name), stdin="\n".join(ekeep))
+        # ecomp = OrderedDict()
+        # Rhos = []
+        # Volumes = []
+        # Kinetics = []
+        # Potentials = []
+        # for line in open("%s-md-energy.xvg" % self.name):
+        #     s = [float(i) for i in line.split()]
+        #     for i in range(len(ekeep) - 2):
+        #         val = s[i+1]
+        #         if ekeep[i] in ecomp:
+        #             ecomp[ekeep[i]].append(val)
+        #         else:
+        #             ecomp[ekeep[i]] = [val]
+        #     Rhos.append(s[-1])
+        #     Volumes.append(s[-2])
+        # Rhos = np.array(Rhos)
+        # Volumes = np.array(Volumes)
+        # Potentials = np.array(ecomp['Potential'])
+        # Kinetics = np.array(ecomp['Kinetic-En.'])
+        # Ecomps = OrderedDict([(key, np.array(val)) for key, val in ecomp.items()])
+        # # Initialized property dictionary.
+        # prop_return = OrderedDict()
+        # prop_return.update({'Rhos': Rhos, 'Potentials': Potentials, 'Kinetics': Kinetics, 'Volumes': Volumes, 'Dips': Dips, 'Ecomps': Ecomps, 'Als': Als, 'Scds': Scds})
+        # if verbose: logger.info("Finished!\n")
+        # return prop_return
+
+    def md_extract(self, tsspec, verbose=True):
+        """
+        Extract time series from the MD trajectory / energy file.
+        Since Gromacs can do so many things in a single call to
+        g_energy, we implement all the functionality in a single big
+        function (it can be split off later.)
+
+        @param[in] tsspec Dictionary with tsnames : tsparams key/value
+        pairs.  tsparams contains any extra information needed to
+        calculate the observable (e.g. atom indices in S_cd) but it
+        may also be None.
+        
+        @return answer Dictionary with tsnames : timeseries key/value pairs.
+        The leading dimension of the time series is always the sample axis.
+        """
+
+        if not hasattr(self, 'mdtraj') or not os.path.exists(self.mdtraj):
+            logger.error('Called the md_extract method without having an MD trajectory!')
+            raise RuntimeError
+
+        if verbose: logger.info("Calculating properties...\n")
+
+        Output = OrderedDict()
+
+        # Figure out which energy terms need to be printed.
+        energyterms = self.energy_termnames(edrfile="%s-md.edr" % self.name)
+        """
+        For reference the menu from g_energy may look like this.
+        
+        Select the terms you want from the following list by
+        selecting either (part of) the name or the number or a combination.
+        End your selection with an empty line or a zero.
+        -------------------------------------------------------------------
+        1  LJ-(SR)          2  Disper.-corr.    3  Coulomb-(SR)     4  Potential     
+        5  Kinetic-En.      6  Total-Energy     7  Temperature      8  Pres.-DC      
+        9  Pressure        10  Constr.-rmsd    11  Box-X           12  Box-Y         
+        13  Box-Z           14  Volume          15  Density         16  pV            
+        17  Enthalpy        18  Vir-XX          19  Vir-XY          20  Vir-XZ        
+        21  Vir-YX          22  Vir-YY          23  Vir-YZ          24  Vir-ZX        
+        25  Vir-ZY          26  Vir-ZZ          27  Pres-XX         28  Pres-XY       
+        29  Pres-XZ         30  Pres-YX         31  Pres-YY         32  Pres-YZ       
+        33  Pres-ZX         34  Pres-ZY         35  Pres-ZZ         36  #Surf*SurfTen 
+        37  Box-Vel-XX      38  Box-Vel-YY      39  Box-Vel-ZZ      40  T-System      
+        41  Lamb-System   
+        """
+
+        # Term names that we want to get from g_energy.
+        ekeep = []
+        # Save anything that comes before Total-Energy if doing an energy component analysis.
+        if 'components' in tsspec:
+            ecomp = [k for k,v in energyterms.items() if v <= energyterms['Total-Energy']]
+            ekeep += ecomp[:]
+        # These are time series which can be directly copied from g_energy output.
+        copy_keys = {'energy' : 'Total-Energy', 'potential' : 'Potential', 'kinetic' : 'Kinetic-En.',
+                     'temperature' : 'Temperature', 'pressure' : 'Pressure', 'volume' : 'Volume',
+                     'density' : 'Density', 'pv' : 'pV'}
+        for i in copy_keys:
+            if i in tsspec and copy_keys[i] not in ekeep:
+                ekeep.append(copy_keys[i])
+        # Area per lipid requires Box-X and Box-Y time series.
+        if 'al' in tsspec:
+            ekeep += ['Box-X', 'Box-Y']
+        ekeep = list(set(ekeep))
+        eksort = []
+        for i in energyterms.keys():
+            for j in ekeep:
+                if j not in energyterms.keys():
+                    logger.error('Energy term in ekeep %s is not present in edr file' % j)
+                    raise RuntimeError
+                if i == j: eksort.append(j)
+
+        # Perform energy component analysis and return properties.
+        self.callgmx("g_energy -f %s-md.edr -o %s-md-energy.xvg -xvg no" % (self.name, self.name), stdin="\n".join(eksort))
+
+        DF = pd.DataFrame([[float(i) for i in line.split()[1:]] for line in open("%s-md-energy.xvg" % self.name)], columns=eksort,
+                          index = pd.Index([float(line.split()[0]) for line in open("%s-md-energy.xvg" % self.name)], name='time'))
+        # Now take the output values from g_energy and allocate them into the Output dictionary.
+        for i in tsspec:
+            if i in copy_keys:
+                Output[i] = np.array(DF[copy_keys[i]])
+        if 'components' in tsspec:
+            for i in ecomp:
+                Output[i] = np.array(DF[copy_keys[i]])
+        # Area per lipid.
+        # HARD CODED NUMBER: number of lipid molecules!
+        if 'al' in tsspec:
+            Output['al'] = np.array(DF['Box-X'])*np.array(DF['Box-Y']) / 64
+
+        # Dipole moments; note we use g_dipoles and not the multipole_moments function.
+        if 'dipole' in tsspec:
+            self.callgmx("g_dipoles -s %s-md.tpr -f %s-md.trr -o %s-md-dip.xvg -xvg no" % 
+                         (self.name, self.name, self.name), stdin="System\n")
+            Output['dipole'] = np.array([[float(i) for i in line.split()[1:4]] 
+                                          for line in open("%s-md-dip.xvg" % self.name)])
+
+        printcool_dictionary(Output, title = 'Output')
+        return Output
+        
 
     def md(self, nsteps=0, nequil=0, verbose=False, deffnm=None, **kwargs):
         
@@ -1520,3 +1630,4 @@ def __init__(self,options,tgt_opts,forcefield):
         self.scripts = ['gmxprefix.bash', 'md_one.py']
         ## Initialize base class.
         super(Thermo_GMX,self).__init__(options,tgt_opts,forcefield)
+ 

From 537ed48c0125880648c048e0114e08e748100fbd Mon Sep 17 00:00:00 2001
From: Lee-Ping Wang <leeping@dn0a22f229.sunet>
Date: Wed, 23 Apr 2014 16:16:16 -0700
Subject: [PATCH 22/25] Clean up

---
 src/gmxio.py | 73 ++++++++++------------------------------------------
 1 file changed, 14 insertions(+), 59 deletions(-)

diff --git a/src/gmxio.py b/src/gmxio.py
index 181ad4557..b48d1799a 100644
--- a/src/gmxio.py
+++ b/src/gmxio.py
@@ -1193,69 +1193,16 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None,
                        'Dips': Extract['dipole'], 
                        'Ecomps': Extract['components']}
         if pbc:
-            Extract1 = self.md_extract(OrderedDict([(i, 0) for i in ['density', 'volume']]))
-            prop_return['Rhos'] = Extract['density']
-            prop_return['Volumes'] = Extract['volume']
+            Extract_ = self.md_extract(OrderedDict([(i, 0) for i in ['density', 'volume']]))
+            prop_return['Rhos'] = Extract_['density']
+            prop_return['Volumes'] = Extract_['volume']
         if bilayer:
-            n_snap = self.n_snaps(nsteps, 1000, timestep)
-            Scds = self.calc_scd(n_snap, timestep)
-            al_vars = ['Box-Y', 'Box-X']
-            self.callgmx("g_energy -f %s-md.edr -o %s-md-energy-xy.xvg -xvg no" % 
-                         (self.name, self.name), stdin="\n".join(al_vars))
-            Xs = []
-            Ys = []
-            for line in open("%s-md-energy-xy.xvg" % self.name):
-                s = [float(i) for i in line.split()]
-                Xs.append(s[-1])
-                Ys.append(s[-2])
-            Xs = np.array(Xs)
-            Ys = np.array(Ys)
-            Als = (Xs * Ys) / 64
-            prop_return['Scds'] = Scds
-            prop_return['Als'] = Als
+            Extract__ = self.md_extract(OrderedDict([(i, 0) for i in ['al', 'scd']]))
+            prop_return['Als'] = Extract__['al']
+            prop_return['Scds'] = Extract__['scd']
 
         if verbose: logger.info("Finished!\n")
         return prop_return
-        # ecomp = OrderedDict()
-        # for line in open("%s-md-energy.xvg" % self.name):
-        #     s = [float(i) for i in line.split()][1:]
-        #     for i, j in enumerate(ekeep):
-        #         val = s[i]
-        #         if j in ecomp:
-        #             ecomp[j].append(s[i])
-        #         else:
-        #             ecomp[j] = [s[i]]
-        # print ecomp.keys()
-            # Rhos.append(s[-1])
-            # Volumes.append(s[-2])
-        # Calculate deuterium order parameter for bilayer optimization.
-        # # Perform energy component analysis and return properties.
-        # self.callgmx("g_energy -f %s-md.edr -o %s-md-energy.xvg -xvg no" % (self.name, self.name), stdin="\n".join(ekeep))
-        # ecomp = OrderedDict()
-        # Rhos = []
-        # Volumes = []
-        # Kinetics = []
-        # Potentials = []
-        # for line in open("%s-md-energy.xvg" % self.name):
-        #     s = [float(i) for i in line.split()]
-        #     for i in range(len(ekeep) - 2):
-        #         val = s[i+1]
-        #         if ekeep[i] in ecomp:
-        #             ecomp[ekeep[i]].append(val)
-        #         else:
-        #             ecomp[ekeep[i]] = [val]
-        #     Rhos.append(s[-1])
-        #     Volumes.append(s[-2])
-        # Rhos = np.array(Rhos)
-        # Volumes = np.array(Volumes)
-        # Potentials = np.array(ecomp['Potential'])
-        # Kinetics = np.array(ecomp['Kinetic-En.'])
-        # Ecomps = OrderedDict([(key, np.array(val)) for key, val in ecomp.items()])
-        # # Initialized property dictionary.
-        # prop_return = OrderedDict()
-        # prop_return.update({'Rhos': Rhos, 'Potentials': Potentials, 'Kinetics': Kinetics, 'Volumes': Volumes, 'Dips': Dips, 'Ecomps': Ecomps, 'Als': Als, 'Scds': Scds})
-        # if verbose: logger.info("Finished!\n")
-        # return prop_return
 
     def md_extract(self, tsspec, verbose=True):
         """
@@ -1340,11 +1287,19 @@ def md_extract(self, tsspec, verbose=True):
         if 'components' in tsspec:
             for i in ecomp:
                 Output[i] = np.array(DF[copy_keys[i]])
+
         # Area per lipid.
         # HARD CODED NUMBER: number of lipid molecules!
         if 'al' in tsspec:
             Output['al'] = np.array(DF['Box-X'])*np.array(DF['Box-Y']) / 64
 
+        # Deuterium order parameter.
+        # HARD CODED: atom names of lipid tails!
+        if 'scd' in tsspec:
+            n_snap = self.n_snaps(nsteps, 1000, timestep)
+            Scds = self.calc_scd(n_snap, timestep)
+            Output['scd'] = Scds
+
         # Dipole moments; note we use g_dipoles and not the multipole_moments function.
         if 'dipole' in tsspec:
             self.callgmx("g_dipoles -s %s-md.tpr -f %s-md.trr -o %s-md-dip.xvg -xvg no" % 

From 3445751a09dc5ef6ba51c14d48e2ee751ce15fcd Mon Sep 17 00:00:00 2001
From: Lee-Ping Wang <leeping@dn0a22f229.sunet>
Date: Wed, 23 Apr 2014 16:41:43 -0700
Subject: [PATCH 23/25] Clean up

---
 src/data/md_one.py | 237 +++------------------------------------------
 src/gmxio.py       |  15 ++-
 src/thermo.py      |   2 +-
 3 files changed, 22 insertions(+), 232 deletions(-)

diff --git a/src/data/md_one.py b/src/data/md_one.py
index 70f6256da..a1a4bfc7e 100644
--- a/src/data/md_one.py
+++ b/src/data/md_one.py
@@ -38,8 +38,6 @@
 # Note: Only the simulation settings that vary across different
 # simulations in a target may be specified on the command line.
 parser = argparse.ArgumentParser()
-parser.add_argument('simulation', type=str,
-                    help='The simulation name (important; used in setting up)')
 parser.add_argument('-T', '--temp', '--temperature', dest='temperature', type=float, 
                     help='Simulation temperature, leave blank for constant energy')
 parser.add_argument('-P', '--pres', '--pressure', dest='pressure', type=float, 
@@ -104,19 +102,19 @@ def main():
     
     """Usage:
     
-    (prefix.sh) md_one.py <name of simulation>
-                               -T, --temperature <temperature in kelvin>
-                               -P, --pressure <pressure in atm>
-                               -g, --grad (if gradients of output timeseries are desired)
-                               -o, --outputs <list of output time series>
-                               -eq, --nequil <number of equilibration MD steps>
-                               -md, --nsteps <number of production MD steps>
-                               -dt, --timestep <number of production MD steps>
-                               -nt, --interval <number of production MD steps>
+    (prefix.sh) md_one.py -T, --temperature <temperature in kelvin>
+                          -P, --pressure <pressure in atm>
+                          -g, --grad (if gradients of output timeseries are desired)
+                          -eq, --nequil <number of equilibration MD steps>
+                          -md, --nsteps <number of production MD steps>
+                          -dt, --timestep <number of production MD steps>
+                          -sp, --sample <number of production MD steps>
+                          -nt, --threads <number of CPU threads to use>
+                          -min, --minimize <minimize the energy>
         
     This program is meant to be called automatically by ForceBalance
-    because most options are loaded from the 'forcebalance.p' input
-    file.
+    because most options are loaded from the 'forcebalance.p'
+    simulation file.
     
     """
 
@@ -146,233 +144,20 @@ def main():
     h = Sim.h
     # Active parameters to differentiate
     pgrad = Sim.pgrad
-
     # Create instances of the MD Engine objects.
     Engine = EngineClass(name=Sim.type, **Sim.EngOpts)
-
     click() # Start timer.
     # This line runs the condensed phase simulation.
     Engine.molecular_dynamics(**Sim.MDOpts)
     logger.info("MD simulation took %.3f seconds\n" % click())
-    
     # Extract properties.
     Results = Engine.md_extract(OrderedDict([(i, {}) for i in Sim.timeseries.keys()]))
-
     # Dump results to file
     logger.info("Writing final force field.\n")
     pvals = FF.make(mvals)
-    
     logger.info("Writing all simulation data to disk.\n")
     with wopen('md_result.p') as f:
         lp_dump(Results, f)
-
-    sys.exit()
-
-    # # Get the temperature.
-    # temperature = args.get('temperature', Fopts.get('temperature', None))
-    # # Get the pressure.
-    # pressure = args.get('pressure', Fopts.get('pressure', None))
-    # # 
-    # nequil = args.get('nequil', Fopts.get('nequil'))
-    
-    #----
-    # load some options from file
-    #----
-    # Finite difference step size
-    h = Fopts['h']
-    # Active parameters for taking the gradient
-    pgrad = Fopts['pgrad']
-    # Name of the initial coordinate file
-    coords = Fopts['coords']
-    # Base name of the initial coordinate file
-    cbase = os.path.splitext(coords)[0]
-    # Actually start to do stuff.
-    # Molecule object corresponding to 
-    M = Molecule(coords)
-
-    #----
-    # Engine options
-    #----
-    EngOpts = OrderedDict([("coords", coords), ("pbc", Fopts['pbc'])])
-    if engname == "openmm":
-        if pbc:
-            EngOpts["platname"] = 'CUDA'
-        else:
-            EngOpts["platname"] = 'Reference'
-        # Force crash if asking for the CUDA platform and force_cuda option is on
-        # (because we don't want to inadvertently run using Reference platform)
-        if EngOpts["platname"] == 'CUDA' and Fopts['force_cuda']:
-            try: Platform.getPlatformByName('CUDA')
-            except: raise RuntimeError('Forcing failure because CUDA platform unavailable')
-        if threads > 1:
-            logger.warn("Setting the number of threads will have no effect on OpenMM engine.\n")
-    elif engname == "gromacs":
-        # Gromacs-specific options
-        EngOpts["gmxpath"] = Fopts["gmxpath"]
-        EngOpts["gmxsuffix"] = Fopts["gmxsuffix"]
-        EngOpts["gmx_top"] = Fopts["gmx_top"]
-        EngOpts["gmx_mdp"] = Fopts["gmx_mdp"]
-        if Fopts['force_cuda']: logger.warn("force_cuda option has no effect on Gromacs engine.")
-        if Fopts['rpmd_beads'] > 0: raise RuntimeError("Gromacs cannot handle RPMD.")
-        if Fopts['mts']: logger.warn("Gromacs not configured for multiple timestep integrator.")
-        if Fopts['anisotropic']: logger.warn("Gromacs not configured for anisotropic box scaling.")
-    elif engname == "tinker":
-        EngOpts["tinkerpath"] = Fopts["tinkerpath"]
-        EngOpts["tinker_key"] = Fopts["tinker_key"]
-
-        # if Fopts['threads'] > 1: 
-    printcool_dictionary(EngOpts, title="Engine options")
-
-    
-
-    # Number of threads, multiple timestep integrator, anisotropic box etc.
-    # threads = Fopts.get('md_threads', 1)
-    # mts = Fopts.get('mts_integrator', 0)
-    # rpmd_beads = Fopts.get('rpmd_beads', 0)
-    # force_cuda = Fopts.get('force_cuda', 0)
-    # nbarostat = Fopts.get('n_mcbarostat', 25)
-    # anisotropic = Fopts.get('anisotropic_box', 0)
-    # minimize = Fopts.get('minimize_energy', 1)
-    sys.exit()
-    
-    #----
-    # Setting up MD simulations
-    #----
-    
-
-    EngOpts["liquid"] = OrderedDict([("coords", liquid_fnm), ("mol", ML), ("pbc", True)])
-    GenOpts = OrderedDict([('FF', FF)])
-    if engname == "openmm":
-        # OpenMM-specific options
-        EngOpts["liquid"]["platname"] = 'CUDA'
-        EngOpts["gas"]["platname"] = 'Reference'
-        if force_cuda:
-            try: Platform.getPlatformByName('CUDA')
-            except: raise RuntimeError('Forcing failure because CUDA platform unavailable')
-        if threads > 1: logger.warn("Setting the number of threads will have no effect on OpenMM engine.\n")
-    elif engname == "gromacs":
-        # Gromacs-specific options
-        GenOpts["gmxpath"] = Fopts["gmxpath"]
-        GenOpts["gmxsuffix"] = Fopts["gmxsuffix"]
-        EngOpts["liquid"]["gmx_top"] = os.path.splitext(liquid_fnm)[0] + ".top"
-        EngOpts["liquid"]["gmx_mdp"] = os.path.splitext(liquid_fnm)[0] + ".mdp"
-        EngOpts["gas"]["gmx_top"] = os.path.splitext(gas_fnm)[0] + ".top"
-        EngOpts["gas"]["gmx_mdp"] = os.path.splitext(gas_fnm)[0] + ".mdp"
-        if force_cuda: logger.warn("force_cuda option has no effect on Gromacs engine.")
-        if rpmd_beads > 0: raise RuntimeError("Gromacs cannot handle RPMD.")
-        if mts: logger.warn("Gromacs not configured for multiple timestep integrator.")
-        if anisotropic: logger.warn("Gromacs not configured for anisotropic box scaling.")
-    elif engname == "tinker":
-        # Tinker-specific options
-        GenOpts["tinkerpath"] = Fopts["tinkerpath"]
-        EngOpts["liquid"]["tinker_key"] = os.path.splitext(liquid_fnm)[0] + ".key"
-        EngOpts["gas"]["tinker_key"] = os.path.splitext(gas_fnm)[0] + ".key"
-        if force_cuda: logger.warn("force_cuda option has no effect on Tinker engine.")
-        if rpmd_beads > 0: raise RuntimeError("TINKER cannot handle RPMD.")
-        if mts: logger.warn("Tinker not configured for multiple timestep integrator.")
-    EngOpts["liquid"].update(GenOpts)
-    EngOpts["gas"].update(GenOpts)
-    for i in EngOpts:
-        printcool_dictionary(EngOpts[i], "Engine options for %s" % i)
-
-    # Set up MD options
-    # These are used in the function call to molecular_dynamics()
-
-    MDOpts["liquid"] = OrderedDict([("nsteps", liquid_nsteps), ("timestep", liquid_timestep),
-                                    ("temperature", temperature), ("pressure", pressure),
-                                    ("nequil", liquid_nequil), ("minimize", minimize),
-                                    ("nsave", int(1000 * liquid_intvl / liquid_timestep)),
-                                    ("verbose", True), ('save_traj', Fopts['save_traj']), 
-                                    ("threads", threads), ("anisotropic", anisotropic), ("nbarostat", nbarostat),
-                                    ("mts", mts), ("rpmd_beads", rpmd_beads), ("faststep", faststep)])
-    MDOpts["gas"] = OrderedDict([("nsteps", gas_nsteps), ("timestep", gas_timestep),
-                                 ("temperature", temperature), ("nsave", int(1000 * gas_intvl / gas_timestep)),
-                                 ("nequil", gas_nequil), ("minimize", minimize), ("threads", 1), ("mts", mts),
-                                 ("rpmd_beads", rpmd_beads), ("faststep", faststep)])
-
-
-    engines = []
-    ## Setup and carry out simulations in chain
-    for i in range(args.length):
-        # Simulation files
-        if engname == "gromacs":
-            ndx_flag = False
-            coords   = args.name + str(i+1) + ".gro"
-            top_file = args.name + str(i+1) + ".top"
-            mdp_file = args.name + str(i+1) + ".mdp"
-            ndx_file = args.name + str(i+1) + ".ndx"
-            if os.path.exists(ndx_file):
-                ndx_flag = True
-                
-        mol = Molecule(coords)
-        #----
-        # Set coordinates and molecule for engine
-        #----
-        EngOpts = OrderedDict([("FF", FF),
-                               ("pbc", True),
-                               ("coords", coords),
-                               ("mol", mol)])
-    
-        if engname == "gromacs":
-            # Gromacs-specific options
-            EngOpts["gmx_top"] = top_file
-            EngOpts["gmx_mdp"] = mdp_file
-            if ndx_flag:
-                EngOpts["gmx_ndx"] = ndx_file
-                
-        printcool_dictionary(EngOpts)
-                                
-        # Create engine objects and store them for subsequent analysis.
-        s = Engine(name=args.name+str(i+1), **EngOpts)
-                
-        #=====================#
-        # Run the simulation. #
-        #=====================#
-        MDOpts = OrderedDict([("nsteps", args.nsteps),
-                              ("nequil", args.nequil)])
-
-        printcool("Molecular dynamics simulation", color=4, bold=True)
-        s.md(verbose=True, **MDOpts)
-                                    
-        engines.append(s)
-    
-    #======================================================================#
-    # Extract the quantities of interest from the MD simulations and dump  #
-    # the results to file.                                                 #
-    # =====================================================================#    
-    results = OrderedDict()        
-    for q in args.quantities:
-        logger.info("Extracting %s...\n" % q)
-
-        # Initialize quantity
-        objstr = "Quantity_" + q.capitalize()
-        dm     = il.import_module('..quantity',
-                                  package='forcebalance.quantity')
-            
-        Quantity = getattr(dm, objstr)(engname, args.temperature, args.pressure)
-            
-        Q, Qerr, Qgrad = Quantity.extract(engines, FF, mvals, h, pgrad, AGrad)
-                    
-        results.setdefault("values", []).append(Q)
-        results.setdefault("errors", []).append(Qerr)
-        results.setdefault("grads",  []).append(Qgrad)
-            
-        logger.info("Finished!\n")
-            
-        # Print out results for the quantity and its derivative.
-        Sep = printcool(("%s: % .4f +- % .4f \nAnalytic Derivative:"
-                              % (q.capitalize(), Q, Qerr)))
-        FF.print_map(vals=Qgrad)
-            
-    # Dump results to file
-    logger.info("Writing final force field.\n")
-    pvals = FF.make(mvals)
-    
-    logger.info("Writing all simulation data to disk.\n")
-    with wopen('md_result.p') as f:
-        lp_dump((np.asarray(results["values"]),
-                 np.asarray(results["errors"]),
-                 np.asarray(results["grads"])), f)
     
 if __name__ == "__main__":
     main()
diff --git a/src/gmxio.py b/src/gmxio.py
index b48d1799a..d6a9d1732 100644
--- a/src/gmxio.py
+++ b/src/gmxio.py
@@ -1192,7 +1192,7 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None,
                        'Kinetics': Extract['kinetic'], 
                        'Dips': Extract['dipole'], 
                        'Ecomps': Extract['components']}
-        if pbc:
+        if self.pbc:
             Extract_ = self.md_extract(OrderedDict([(i, 0) for i in ['density', 'volume']]))
             prop_return['Rhos'] = Extract_['density']
             prop_return['Volumes'] = Extract_['volume']
@@ -1226,8 +1226,6 @@ def md_extract(self, tsspec, verbose=True):
 
         if verbose: logger.info("Calculating properties...\n")
 
-        Output = OrderedDict()
-
         # Figure out which energy terms need to be printed.
         energyterms = self.energy_termnames(edrfile="%s-md.edr" % self.name)
         """
@@ -1278,15 +1276,23 @@ def md_extract(self, tsspec, verbose=True):
         # Perform energy component analysis and return properties.
         self.callgmx("g_energy -f %s-md.edr -o %s-md-energy.xvg -xvg no" % (self.name, self.name), stdin="\n".join(eksort))
 
+        
         DF = pd.DataFrame([[float(i) for i in line.split()[1:]] for line in open("%s-md-energy.xvg" % self.name)], columns=eksort,
                           index = pd.Index([float(line.split()[0]) for line in open("%s-md-energy.xvg" % self.name)], name='time'))
+
+
+        # Okay, I'm not completely pandas-crazy yet.
+        Output = OrderedDict()
+
         # Now take the output values from g_energy and allocate them into the Output dictionary.
         for i in tsspec:
             if i in copy_keys:
                 Output[i] = np.array(DF[copy_keys[i]])
         if 'components' in tsspec:
+            Components = OrderedDict()
             for i in ecomp:
-                Output[i] = np.array(DF[copy_keys[i]])
+                Components[i] = np.array(DF[i])
+            Output['components'] = Components
 
         # Area per lipid.
         # HARD CODED NUMBER: number of lipid molecules!
@@ -1307,7 +1313,6 @@ def md_extract(self, tsspec, verbose=True):
             Output['dipole'] = np.array([[float(i) for i in line.split()[1:4]] 
                                           for line in open("%s-md-dip.xvg" % self.name)])
 
-        printcool_dictionary(Output, title = 'Output')
         return Output
         
 
diff --git a/src/thermo.py b/src/thermo.py
index 56cdf7dd5..4447d81d7 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -792,7 +792,7 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True):
                         LinkFile(os.path.join(os.path.split(__file__)[0], "data", f),
                                  os.path.join(os.getcwd(), f))
                     # Put together the command.
-                    cmdlist = ['%s python md_one.py %s' % (self.mdpfx, Sim.type)]
+                    cmdlist = ['%s python md_one.py' % (self.mdpfx)]
                     if temp != None:
                         cmdlist.append('-T %g' % float(temp))
                     if pres != None:

From 63cae2f168a790345f54b6e9b4bf8a48ac02dc72 Mon Sep 17 00:00:00 2001
From: Lee-Ping Wang <leeping@Lee-Pings-MacBook-Pro.local>
Date: Wed, 23 Apr 2014 23:17:54 -0700
Subject: [PATCH 24/25] Density observable can calculate the density and
 gradient

---
 src/data/md_one.py |   4 ++
 src/gmxio.py       |  74 +++++++++++------------
 src/nifty.py       |   8 +++
 src/observable.py  | 101 ++++++++++++++------------------
 src/thermo.py      | 143 +++++++++++++++++++++++++++++++--------------
 5 files changed, 187 insertions(+), 143 deletions(-)

diff --git a/src/data/md_one.py b/src/data/md_one.py
index a1a4bfc7e..e71f9c3bc 100644
--- a/src/data/md_one.py
+++ b/src/data/md_one.py
@@ -25,6 +25,7 @@
 from forcebalance.nifty import lp_dump, lp_load, wopen
 from forcebalance.nifty import printcool, printcool_dictionary
 from forcebalance.molecule import Molecule
+from forcebalance.thermo import energy_derivatives
 
 from collections import OrderedDict
 
@@ -152,6 +153,9 @@ def main():
     logger.info("MD simulation took %.3f seconds\n" % click())
     # Extract properties.
     Results = Engine.md_extract(OrderedDict([(i, {}) for i in Sim.timeseries.keys()]))
+    # Calculate energy and dipole derivatives if needed.
+    if AGrad:
+        Results['derivatives'] = energy_derivatives(Engine, FF, mvals, h, pgrad, dipole='dipole' in Sim.timeseries.keys())
     # Dump results to file
     logger.info("Writing final force field.\n")
     pvals = FF.make(mvals)
diff --git a/src/gmxio.py b/src/gmxio.py
index d6a9d1732..c651df3b1 100644
--- a/src/gmxio.py
+++ b/src/gmxio.py
@@ -1187,34 +1187,29 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None,
         self.mdtraj = '%s-md.trr' % self.name
 
         # Call md_extract and return the prop_return dictionary (backward compatibility with old functionality.)
-        Extract = self.md_extract(OrderedDict([(i, 0) for i in ['potential', 'kinetic', 'dipole', 'components']]))
-        prop_return = {'Potentials': Extract['potential'], 
-                       'Kinetics': Extract['kinetic'], 
-                       'Dips': Extract['dipole'], 
-                       'Ecomps': Extract['components']}
-        if self.pbc:
-            Extract_ = self.md_extract(OrderedDict([(i, 0) for i in ['density', 'volume']]))
-            prop_return['Rhos'] = Extract_['density']
-            prop_return['Volumes'] = Extract_['volume']
-        if bilayer:
-            Extract__ = self.md_extract(OrderedDict([(i, 0) for i in ['al', 'scd']]))
-            prop_return['Als'] = Extract__['al']
-            prop_return['Scds'] = Extract__['scd']
-
-        if verbose: logger.info("Finished!\n")
+        old_map = {'potential' : 'Potentials', 'kinetic' : 'Kinetics', 'dipole' : 'Dips', 'components' : 'Ecomps',
+                   'density' : 'Rhos', 'volume' : 'Volumes', 'al' : 'Als', 'scd' : 'Scds'}
+        tsnames = ['potential', 'kinetic', 'dipole', 'components']
+        if self.pbc: tsnames += ['density', 'volume']
+        if bilayer: tsnames += ['al', 'scd']
+        Extract = self.md_extract(tsnames)
+        prop_return = OrderedDict([(old_map[i], Extract[i]) for i in Extract.keys() if i in old_map])
         return prop_return
 
-    def md_extract(self, tsspec, verbose=True):
+    def md_extract(self, tsnames, tsspec={}, verbose=True):
         """
         Extract time series from the MD trajectory / energy file.
         Since Gromacs can do so many things in a single call to
         g_energy, we implement all the functionality in a single big
         function (it can be split off later.)
 
+        @param[in] tsnames List of tsnames, containing names of
+        timeseries that need to be evaluated.
+
         @param[in] tsspec Dictionary with tsnames : tsparams key/value
         pairs.  tsparams contains any extra information needed to
         calculate the observable (e.g. atom indices in S_cd) but it
-        may also be None.
+        isn't strictly required.
         
         @return answer Dictionary with tsnames : timeseries key/value pairs.
         The leading dimension of the time series is always the sample axis.
@@ -1251,7 +1246,7 @@ def md_extract(self, tsspec, verbose=True):
         # Term names that we want to get from g_energy.
         ekeep = []
         # Save anything that comes before Total-Energy if doing an energy component analysis.
-        if 'components' in tsspec:
+        if 'components' in tsnames:
             ecomp = [k for k,v in energyterms.items() if v <= energyterms['Total-Energy']]
             ekeep += ecomp[:]
         # These are time series which can be directly copied from g_energy output.
@@ -1259,10 +1254,10 @@ def md_extract(self, tsspec, verbose=True):
                      'temperature' : 'Temperature', 'pressure' : 'Pressure', 'volume' : 'Volume',
                      'density' : 'Density', 'pv' : 'pV'}
         for i in copy_keys:
-            if i in tsspec and copy_keys[i] not in ekeep:
+            if i in tsnames and copy_keys[i] not in ekeep:
                 ekeep.append(copy_keys[i])
         # Area per lipid requires Box-X and Box-Y time series.
-        if 'al' in tsspec:
+        if 'al' in tsnames:
             ekeep += ['Box-X', 'Box-Y']
         ekeep = list(set(ekeep))
         eksort = []
@@ -1276,45 +1271,44 @@ def md_extract(self, tsspec, verbose=True):
         # Perform energy component analysis and return properties.
         self.callgmx("g_energy -f %s-md.edr -o %s-md-energy.xvg -xvg no" % (self.name, self.name), stdin="\n".join(eksort))
 
-        
-        DF = pd.DataFrame([[float(i) for i in line.split()[1:]] for line in open("%s-md-energy.xvg" % self.name)], columns=eksort,
-                          index = pd.Index([float(line.split()[0]) for line in open("%s-md-energy.xvg" % self.name)], name='time'))
+        tarray = np.array([float(line.split()[0]) for line in open("%s-md-energy.xvg" % self.name)])
+        times = pd.Index(tarray, name='time')
+        xvgdata = [[float(i) for i in line.split()[1:]] for line in open("%s-md-energy.xvg" % self.name)]
+        xvgdf = pd.DataFrame(xvgdata, columns=eksort, index = times)
 
 
-        # Okay, I'm not completely pandas-crazy yet.
+        # Attempt to use Pandas more effectively.
         Output = OrderedDict()
 
+        Output['time'] = tarray
         # Now take the output values from g_energy and allocate them into the Output dictionary.
-        for i in tsspec:
+        for i in tsnames:
             if i in copy_keys:
-                Output[i] = np.array(DF[copy_keys[i]])
-        if 'components' in tsspec:
-            Components = OrderedDict()
-            for i in ecomp:
-                Components[i] = np.array(DF[i])
-            Output['components'] = Components
+                Output[i] = np.array(xvgdf[copy_keys[i]])
+        if 'components' in tsnames:
+            # Energy component analysis is a DataFrame.
+            Output['components'] = xvgdf[ecomp]
 
         # Area per lipid.
         # HARD CODED NUMBER: number of lipid molecules!
-        if 'al' in tsspec:
-            Output['al'] = np.array(DF['Box-X'])*np.array(DF['Box-Y']) / 64
+        if 'al' in tsnames:
+            Output['al'] = np.array(xvgdf['Box-X'])*np.array(xvgdf['Box-Y']) / 64
 
         # Deuterium order parameter.
         # HARD CODED: atom names of lipid tails!
-        if 'scd' in tsspec:
+        if 'scd' in tsnames:
             n_snap = self.n_snaps(nsteps, 1000, timestep)
-            Scds = self.calc_scd(n_snap, timestep)
-            Output['scd'] = Scds
+            Output['scd'] = self.calc_scd(n_snap, timestep)
 
         # Dipole moments; note we use g_dipoles and not the multipole_moments function.
-        if 'dipole' in tsspec:
+        if 'dipole' in tsnames:
             self.callgmx("g_dipoles -s %s-md.tpr -f %s-md.trr -o %s-md-dip.xvg -xvg no" % 
                          (self.name, self.name, self.name), stdin="System\n")
             Output['dipole'] = np.array([[float(i) for i in line.split()[1:4]] 
-                                          for line in open("%s-md-dip.xvg" % self.name)])
-
+                                         for line in open("%s-md-dip.xvg" % self.name)])
+            
+        # We could convert it to a Panel if we wanted, but I'm not fully confident using it...
         return Output
-        
 
     def md(self, nsteps=0, nequil=0, verbose=False, deffnm=None, **kwargs):
         
diff --git a/src/nifty.py b/src/nifty.py
index d51999d1e..938b4dd2a 100644
--- a/src/nifty.py
+++ b/src/nifty.py
@@ -320,6 +320,14 @@ def flat(vec):
     """
     return np.array(vec).reshape(-1)
 
+def getval(dframe, col):
+    """ Extract the single non-NaN value from a column. """
+    nnan = [i for i in dframe[col] if not isnpnan(i)]
+    if len(nnan) != 1:
+        logger.error('%i values in column %s are not NaN (expected only 1)' % (len(nnan), col))
+        raise RuntimeError
+    return nnan[0]
+
 def monotonic(arr, start, end):
     # Make sure an array is monotonically decreasing from the start to the end.
     a0 = arr[start]
diff --git a/src/observable.py b/src/observable.py
index 2c6d7858e..23fb6a0cc 100644
--- a/src/observable.py
+++ b/src/observable.py
@@ -3,8 +3,9 @@
 
 from forcebalance.finite_difference import fdwrap, f12d3p
 from forcebalance.molecule import Molecule
-from forcebalance.nifty import col, flat, statisticalInefficiency
-from forcebalance.nifty import printcool
+from forcebalance.nifty import col, flat, getval
+from forcebalance.nifty import printcool, statisticalInefficiency
+from forcebalance.optimizer import Counter
 
 from collections import OrderedDict
 
@@ -14,8 +15,7 @@
 # method mean_stderr
 def mean_stderr(ts):
     """Return mean and standard deviation of a time series ts."""
-    return np.mean(ts), \
-      np.std(ts)*np.sqrt(statisticalInefficiency(ts, warn=False)/len(ts))
+    return np.mean(ts), np.std(ts)*np.sqrt(statisticalInefficiency(ts, warn=False)/len(ts))
 
 # method energy_derivatives
 def energy_derivatives(engine, FF, mvals, h, pgrad, length, AGrad=True):
@@ -75,11 +75,12 @@ class Observable(object):
         Identifier for the observable that is specified in `observables` in Target
         options.
     """
-    def __init__(self, source, name=None):
+    def __init__(self, source):
         # Reference data which can be useful in calculating the observable.
+        if 'temp' in source: self.temp = getval(source, 'temp')
+        if 'pres' in source: self.pres = getval(source, 'pres')
         self.Data = source[self.columns]
-        self.name = name if name is not None else "empty"
-                    
+        
     def __str__(self):
         return "Observable = " + self.name.capitalize() + "; Columns = " + ', '.join(self.columns)
 
@@ -114,6 +115,26 @@ def extract(self, engines, FF, mvals, h, AGrad=True):
         logger.error("Extract method not implemented in base class.\n")    
         raise NotImplementedError
 
+    def aggregate(self, Sims, AGrad, cycle=None):
+        print self.name
+        if cycle == None: cycle = Counter()
+        # Different from the Results objects in the Simulation, this
+        # one is keyed by the simulation type then by the time series
+        # data type.
+        self.TimeSeries = OrderedDict([(i, OrderedDict()) for i, j in self.requires.items()])
+        for stype in self.requires:
+            for dtype in self.requires[stype]:
+                self.TimeSeries[stype][dtype] = np.concatenate([Sim.Results[cycle][dtype] for Sim in Sims if Sim.type == stype])
+        if AGrad:
+            # Also aggregate the derivative information along the second axis (snapshot axis)
+            self.Derivatives = OrderedDict()
+            for stype in self.requires:
+                # The derivatives that we have may be obtained from the 'derivatives' data structure of the first Simulation
+                # that matches the required simulation type.
+                self.Derivatives[stype] = OrderedDict()
+                for dtype in [Sim.Results[cycle]['derivatives'].keys() for Sim in Sims if Sim.type == stype][0]:
+                    self.Derivatives[stype][dtype] = np.concatenate([Sim.Results[cycle]['derivatives'][dtype] for Sim in Sims if Sim.type == stype], axis=1)
+
 # class Observable_Density
 class Observable_Density(Observable):
 
@@ -136,66 +157,30 @@ def __init__(self, source):
         self.columns = ['density']
         super(Observable_Density, self).__init__(source)
 
-    def extract(self, engines, FF, mvals, h, pgrad, AGrad=True):         
+    def evaluate(self, AGrad):         
         #==========================================#
         #  Physical constants and local variables. #
         #==========================================#
         # Energies in kJ/mol and lengths in nanometers.
         kB    = 0.008314472471220214
-        kT    = kB*self.temperature
+        kT    = kB*self.temp
         Beta  = 1.0/kT
         mBeta = -Beta
- 
-        #======================================================#
-        #  Get simulation properties depending on the engines. #
-        #======================================================#
-        if self.engname == "gromacs":
-            # Default name
-            deffnm = os.path.basename(os.path.splitext(engines[0].mdene)[0])
-            # What energy terms are there and what is their order
-            energyterms = engines[0].energy_termnames(edrfile="%s.%s" % (deffnm, "edr"))
-            # Grab energy terms to print and keep track of energy term order.
-            ekeep  = ['Total-Energy', 'Potential', 'Kinetic-En.', 'Temperature']
-            ekeep += ['Volume', 'Density']
-
-            ekeep_order = [key for (key, value) in
-                           sorted(energyterms.items(), key=lambda (k, v) : v)
-                           if key in ekeep]
-
-            # Perform energy component analysis and return properties.
-            engines[0].callgmx(("g_energy " +
-                                "-f %s.%s " % (deffnm, "edr") +
-                                "-o %s-energy.xvg " % deffnm +
-                                "-xvg no"),
-                                stdin="\n".join(ekeep))
-            
-        # Read data and store properties by grabbing columns in right order.
-        data        = np.loadtxt("%s-energy.xvg" % deffnm)            
-        Energy      = data[:, ekeep_order.index("Total-Energy") + 1]
-        Potential   = data[:, ekeep_order.index("Potential") + 1]
-        Kinetic     = data[:, ekeep_order.index("Kinetic-En.") + 1]
-        Volume      = data[:, ekeep_order.index("Volume") + 1]
-        Temperature = data[:, ekeep_order.index("Temperature") + 1]
-        Density     = data[:, ekeep_order.index("Density") + 1]
-
-        #============================================#
-        #  Compute the potential energy derivatives. #
-        #============================================#
-        logger.info(("Calculating potential energy derivatives " +
-                     "with finite difference step size: %f\n" % h))
-        printcool("Initializing array to length %i" % len(Energy),
-                  color=4, bold=True)    
-        G = energy_derivatives(engines[0], FF, mvals, h, pgrad, len(Energy), AGrad)
-        
-        #=========================================#
-        #  Observable properties and derivatives. #
-        #=========================================#
+        phase = self.requires.keys()[0]
+        # Density time series.
+        Density  = self.TimeSeries[phase]['density']
         # Average and error.
         Rho_avg, Rho_err = mean_stderr(Density)
-        # Analytic first derivative.
-        Rho_grad = mBeta * (flat(np.mat(G) * col(Density)) / len(Density) \
-                            - np.mean(Density) * np.mean(G, axis=1))
-        return Rho_avg, Rho_err, Rho_grad
+        Answer = OrderedDict()
+        Answer['mean'] = Rho_avg
+        Answer['stderr'] = Rho_err
+        if AGrad:
+            G = self.Derivatives[phase]['potential']
+            # Analytic first derivative.
+            Rho_grad = mBeta * (flat(np.matrix(G) * col(Density)) / len(Density)
+                                - np.mean(Density) * np.mean(G, axis=1))
+            Answer['grad'] = Rho_grad
+        return Answer
 
 class Liquid_Density(Observable_Density):
     def __init__(self, source):
diff --git a/src/thermo.py b/src/thermo.py
index 4447d81d7..76a20d1cf 100644
--- a/src/thermo.py
+++ b/src/thermo.py
@@ -12,12 +12,13 @@
 from forcebalance.molecule import Molecule
 from forcebalance.observable import OMap
 from forcebalance.target import Target
-from forcebalance.finite_difference import in_fd
+from forcebalance.finite_difference import in_fd, fdwrap, f12d3p
 from forcebalance.nifty import flat, col, row, isint, isnpnan
-from forcebalance.nifty import lp_dump, lp_load, wopen, _exec
+from forcebalance.nifty import lp_dump, lp_load, wopen, _exec, getval
 from forcebalance.nifty import GoInto, LinkFile, link_dir_contents
 from forcebalance.nifty import printcool, printcool_dictionary
 from forcebalance.nifty import getWorkQueue
+from forcebalance.optimizer import Counter
 
 from collections import defaultdict, OrderedDict
 
@@ -27,14 +28,6 @@
 # print logger.parent.parent.handlers[0]
 # logger.parent.parent.handlers = []
 
-def getval(dframe, col):
-    """ Extract the single non-NaN value from a column. """
-    nnan = [i for i in dframe[col] if not isnpnan(i)]
-    if len(nnan) != 1:
-        logger.error('%i values in column %s are not NaN (expected only 1)' % (len(nnan), col))
-        raise RuntimeError
-    return nnan[0]
-
 class TextParser(object):
     """ Parse a text file. """
     def __init__(self, fnm):
@@ -436,6 +429,53 @@ def find_file(tgtdir, index, stype, sufs, iscrd, icn=0):
         raise RuntimeError
     return found, 0 if numbered else icn
 
+def energy_derivatives(engine, FF, mvals, h, pgrad, dipole=False):
+
+    """
+    Compute the first and second derivatives of a set of snapshot
+    energies with respect to the force field parameters.
+
+    This basically calls the finite difference subroutine on the
+    energy_driver subroutine also in this script.
+
+    In the future we may need to be more sophisticated with
+    controlling the quantities which are differentiated, but for
+    now this is okay..
+
+    @param[in] engine Engine object for calculating energies
+    @param[in] FF Force field object
+    @param[in] mvals Mathematical parameter values
+    @param[in] h Finite difference step size
+    @param[in] pgrad List of active parameters for differentiation
+    @param[in] dipole Switch for dipole derivatives.
+    @return G First derivative of the energies in a N_param x N_coord array
+    @return GDx First derivative of the box dipole moment x-component in a N_param x N_coord array
+    @return GDy First derivative of the box dipole moment y-component in a N_param x N_coord array
+    @return GDz First derivative of the box dipole moment z-component in a N_param x N_coord array
+
+    """
+    def single_point(mvals_):
+        FF.make(mvals_)
+        if dipole:
+            return engine.energy_dipole()
+        else:
+            return engine.energy()
+
+    ED0 = single_point(mvals)
+    G   = OrderedDict()
+    G['potential'] = np.zeros((FF.np, ED0.shape[0]))
+    if dipole:
+        G['dipole'] = np.zeros((FF.np, ED0.shape[0], 3))
+    for i in pgrad:
+        logger.info("%i %s\r" % (i, (FF.plist[i] + " "*30)))
+        edg, _ = f12d3p(fdwrap(single_point,mvals,i),h,f0=ED0)
+        if dipole:
+            G['potential'][i] = edg[:,0]
+            G['dipole'][i]    = edg[:,1:]
+        else:
+            G['potential'][i] = edg[:]
+    return G
+
 class Thermo(Target):
     """
     A target for fitting general experimental data sets. The source
@@ -634,6 +674,8 @@ def floatcol(col):
                 self.Data[col] = self.Data[col].astype(float)
 
         intcol('n_ic')
+        floatcol('temp')
+        floatcol('pres')
 
         # A list of indices (i.e. top-level indices) which correspond
         # to sets of simulations that we'll be running.
@@ -685,7 +727,7 @@ def initialize_observables(self):
                     Objs = []
                     Reqs = []
                     for OClass in OMap[oname]:
-                        OObj = OClass(self.Data)
+                        OObj = OClass(self.Data.ix[index])
                         Reqs.append(OObj.requires.keys())
                         if all([i in self.SimNames for i in OObj.requires.keys()]):
                             Objs.append(OObj)
@@ -776,6 +818,7 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True):
             for Sim in self.Simulations[index]:
                 Sim.gradient = AGrad
                 simd = os.path.join(os.getcwd(), index, Sim.name)
+                Sim.RunDirs[Counter()] = simd
                 GoInto(simd)
                 # Submit or run the simulation if the result file does not exist.
                 if not (os.path.exists('result.p') or os.path.exists('result.p.bz2')):
@@ -807,7 +850,7 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True):
                     # if wq == None:
                     #     logger.info("Running condensed phase simulation locally.\n")
                     #     logger.info("You may tail -f %s/npt.out in another terminal window\n" % os.getcwd())
-                    _exec(cmdstr, copy_stderr=False, outfnm='md_one.out')
+                    _exec(cmdstr, copy_stderr=True, outfnm='md_one.out')
                     # else:
                     #     queue_up(wq, command = cmdstr+' &> npt.out',
                     #              input_files = self.nptfiles + self.scripts + ['forcebalance.p'],
@@ -815,39 +858,6 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True):
                 os.chdir(cwd)
         return
 
-    def retrieve(self, dp):
-        """Retrieve the molecular dynamics (MD) results and store the calculated
-        observables in the Point object dp.
-
-        Parameters
-        ----------
-        dp : Point
-            Store the calculated observables in this point.
-
-        Returns
-        -------
-        Nothing
-        
-        """
-        abspath = os.path.join(os.getcwd(), '%d/md_result.p' % dp.idnr)
-
-        if os.path.exists(abspath):
-            logger.info('Reading data from ' + abspath + '.\n')
-
-            vals, errs, grads = lp_load(open(abspath))
-
-            dp.data["values"] = vals
-            dp.data["errors"] = errs
-            dp.data["grads"]  = grads
-
-        else:
-            msg = 'The file ' + abspath + ' does not exist so we cannot read it.\n'
-            logger.warning(msg)
-
-            dp.data["values"] = np.zeros((len(self.observables)))
-            dp.data["errors"] = np.zeros((len(self.observables)))
-            dp.data["grads"]  = np.zeros((len(self.observables), self.FF.np))
-            
     def indicate(self):
         """Shows optimization state."""
         return
@@ -995,6 +1005,18 @@ def get(self, mvals, AGrad=True, AHess=True):
         Objective = 0.0
         Gradient  = np.zeros(self.FF.np)
         Hessian   = np.zeros((self.FF.np, self.FF.np))
+
+        # Retrieve simulation results.
+        for index in self.Indices:
+            for Sim in self.Simulations[index]:
+                Sim.retrieve()
+        
+        # Calculate observable values.
+        for oname in self.Observables.keys():
+            for index in self.Indices:
+                self.Observables[oname][index].aggregate(self.Simulations[index], AGrad)
+                if oname == 'density': self.Observables[oname][index].evaluate(AGrad)
+
         return { "X": Objective, "G": Gradient, "H": Hessian} 
 
         for pt in self.points:
@@ -1120,10 +1142,13 @@ def __init__(self, tgt, data, name, index, stype, icn, tsnames):
         self.Data = copy.deepcopy(data)
         # Type of the simulation (map to simulation settings)
         self.type = stype
+        # Root directory of the ForceBalance job
+        self.root = tgt.root
         # Locate the initial coordinate file and frame number.
         self.initial, self.iframe = find_file(os.path.join(tgt.root, tgt.tgtdir), index, stype, tgt.crdsfx, True, icn)
         # The time series for the simulation.
         self.timeseries = OrderedDict([(i, []) for i in tsnames])
+        self.timeseries['potential'] = [] # ALWAYS require the potential energy.
         # The file extension that the coordinate file will be written with.
         self.fext = os.path.splitext(self.initial)[1]
         # Auxiliary files to be copied to the current location prior to running the simulation.
@@ -1139,6 +1164,10 @@ def __init__(self, tgt, data, name, index, stype, icn, tsnames):
         self.h = tgt.h
         # Active parameters to differentiate over.
         self.pgrad = tgt.pgrad
+        # List of ITERATION : directory pairs.
+        self.RunDirs = OrderedDict()
+        # List of ITERATION : result data structures.
+        self.Results = OrderedDict()
 
         pbc = Simulation.type_settings[self.type]['pbc']
 
@@ -1214,3 +1243,27 @@ def __str__(self):
         msg.append("Initial Conditions: File %s Frame %i" % (self.initial, self.iframe))
         msg.append("Timeseries Names: %s" % (', '.join(self.timeseries.keys())))
         return "\n".join(msg)
+
+    def retrieve(self, cycle=None):
+        """Retrieve the molecular dynamics (MD) results and store the calculated
+        observables in the Simulation object.
+
+        Parameters
+        ----------
+        dp : Point
+            Store the calculated observables in this point.
+
+        Returns
+        -------
+        Nothing
+        
+        """
+        if cycle == None: cycle = Counter()
+
+        abspath = os.path.join(self.RunDirs[cycle], 'md_result.p')
+        if os.path.exists(abspath):
+            logger.info('Simulation %s reading data from ' % self.name  + abspath.replace(self.root+'/', '') + ' .\n')
+            self.Results[cycle] = lp_load(open(abspath))
+        else:
+            logger.warning('The file ' + abspath + ' does not exist so we cannot read it.\n')
+            self.Results[cycle] = None

From 2af905ff5190d8d4ec74a6d60b7f0ebaa9a3f33b Mon Sep 17 00:00:00 2001
From: leeping <leeping@stanford.edu>
Date: Tue, 29 Apr 2014 07:07:46 -0700
Subject: [PATCH 25/25] Fix occasional failure in topology building

---
 src/molecule.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/molecule.py b/src/molecule.py
index f423ec254..0d69eaa81 100644
--- a/src/molecule.py
+++ b/src/molecule.py
@@ -1419,20 +1419,20 @@ def build_topology(self, sn=None, Fac=1.2):
                 zidx = -1
                 for j in xgrd:
                     xi = self.xyzs[sn][i][0]
-                    if toppbc and xi < 0: xi += xmax
-                    if toppbc and xi > xmax: xi -= xmax
+                    while toppbc and xi < 0: xi += xmax
+                    while toppbc and xi > xmax: xi -= xmax
                     if xi < j: break
                     xidx += 1
                 for j in ygrd:
                     yi = self.xyzs[sn][i][1]
-                    if toppbc and yi < 0: yi += ymax
-                    if toppbc and yi > ymax: yi -= ymax
+                    while toppbc and yi < 0: yi += ymax
+                    while toppbc and yi > ymax: yi -= ymax
                     if yi < j: break
                     yidx += 1
                 for j in zgrd:
                     zi = self.xyzs[sn][i][2]
-                    if toppbc and zi < 0: zi += zmax
-                    if toppbc and zi > zmax: zi -= zmax
+                    while toppbc and zi < 0: zi += zmax
+                    while toppbc and zi > zmax: zi -= zmax
                     if zi < j: break
                     zidx += 1
                 gasn[(xidx,yidx,zidx)].append(i)